[PATCH] xfs: dont remain new blocks in cowfork for unshare

public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] xfs: dont remain new blocks in cowfork for unshare
@ 2024-05-17 21:26 Wengang Wang
  2024-05-20 18:08 ` Darrick J. Wong
  0 siblings, 1 reply; 9+ messages in thread
From: Wengang Wang @ 2024-05-17 21:26 UTC (permalink / raw)
  To: linux-xfs; +Cc: wen.gang.wang

Unsharing blocks is implemented by doing CoW to those blocks. That has a side
effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
has no hint that future writes would like come to the blocks that follow the
unshared ones, the extra blocks in Cow fork is meaningless.

This patch makes that no new blocks caused by unshare remain in Cow fork.
The change in xfs_get_extsz_hint() makes the new blocks have more change to be
contigurous in unshare path when there are multiple extents to unshare.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
 fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
 fs/xfs/xfs_reflink.c |  7 +++++--
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d55b42b2480d..ade945c8d783 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -58,6 +58,15 @@ xfs_get_extsz_hint(
 	 */
 	if (xfs_is_always_cow_inode(ip))
 		return 0;
+
+	/*
+	 * let xfs_buffered_write_iomap_begin() do delayed allocation
+	 * in unshare path so that the new blocks have more chance to
+	 * be contigurous
+	 */
+	if (xfs_iflags_test(ip, XFS_IUNSHARE))
+		return 0;
+
 	if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
 		return ip->i_extsize;
 	if (XFS_IS_REALTIME_INODE(ip))
@@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
 {
 	xfs_extlen_t		a, b;
 
+	/*
+	 * in unshare path, allocate exactly the number of the blocks to be
+	 * unshared so that no new blocks caused the unshare operation remain
+	 * in Cow fork after the unshare is done
+	 */
+	if (xfs_iflags_test(ip, XFS_IUNSHARE))
+		return 1;
+
 	a = 0;
 	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
 		a = ip->i_cowextsize;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ab46ffb3ac19..6a8ad68dac1e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
  * i_flags helper functions
  */
 static inline void
-__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
 {
 	ip->i_flags |= flags;
 }
 
 static inline void
-xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
 {
 	spin_lock(&ip->i_flags_lock);
 	__xfs_iflags_set(ip, flags);
@@ -221,7 +221,7 @@ xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
 }
 
 static inline void
-xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_clear(xfs_inode_t *ip, unsigned long flags)
 {
 	spin_lock(&ip->i_flags_lock);
 	ip->i_flags &= ~flags;
@@ -229,13 +229,13 @@ xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
 }
 
 static inline int
-__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+__xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
 {
 	return (ip->i_flags & flags);
 }
 
 static inline int
-xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
 {
 	int ret;
 	spin_lock(&ip->i_flags_lock);
@@ -245,7 +245,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
 }
 
 static inline int
-xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned long flags)
 {
 	int ret;
 
@@ -258,7 +258,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 }
 
 static inline int
-xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
 {
 	int ret;
 
@@ -321,25 +321,25 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
 /*
  * In-core inode flags.
  */
-#define XFS_IRECLAIM		(1 << 0) /* started reclaiming this inode */
-#define XFS_ISTALE		(1 << 1) /* inode has been staled */
-#define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
-#define XFS_INEW		(1 << 3) /* inode has just been allocated */
-#define XFS_IPRESERVE_DM_FIELDS	(1 << 4) /* has legacy DMAPI fields set */
-#define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
-#define XFS_IFLUSHING		(1 << 7) /* inode is being flushed */
+#define XFS_IRECLAIM		(1UL << 0) /* started reclaiming this inode */
+#define XFS_ISTALE		(1UL << 1) /* inode has been staled */
+#define XFS_IRECLAIMABLE	(1UL<< 2) /* inode can be reclaimed */
+#define XFS_INEW		(1UL<< 3) /* inode has just been allocated */
+#define XFS_IPRESERVE_DM_FIELDS	(1UL << 4) /* has legacy DMAPI fields set */
+#define XFS_ITRUNCATED		(1UL << 5) /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE	(1UL << 6) /* dirty release already seen */
+#define XFS_IFLUSHING		(1UL << 7) /* inode is being flushed */
 #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
-#define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
-#define XFS_IEOFBLOCKS		(1 << 9) /* has the preallocblocks tag set */
-#define XFS_NEED_INACTIVE	(1 << 10) /* see XFS_INACTIVATING below */
+#define XFS_IPINNED		(1UL << __XFS_IPINNED_BIT)UL
+#define XFS_IEOFBLOCKS		(1UL << 9) /* has the preallocblocks tag set */
+#define XFS_NEED_INACTIVE	(1UL << 10) /* see XFS_INACTIVATING below */
 /*
  * If this unlinked inode is in the middle of recovery, don't let drop_inode
  * truncate and free the inode.  This can happen if we iget the inode during
  * log recovery to replay a bmap operation on the inode.
  */
-#define XFS_IRECOVERY		(1 << 11)
-#define XFS_ICOWBLOCKS		(1 << 12)/* has the cowblocks tag set */
+#define XFS_IRECOVERY		(1UL << 11)
+#define XFS_ICOWBLOCKS		(1UL << 12)/* has the cowblocks tag set */
 
 /*
  * If we need to update on-disk metadata before this IRECLAIMABLE inode can be
@@ -348,10 +348,10 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
  * inactivation completes, both flags will be cleared and the inode is a
  * plain old IRECLAIMABLE inode.
  */
-#define XFS_INACTIVATING	(1 << 13)
+#define XFS_INACTIVATING	(1UL << 13)
 
 /* Quotacheck is running but inode has not been added to quota counts. */
-#define XFS_IQUOTAUNCHECKED	(1 << 14)
+#define XFS_IQUOTAUNCHECKED	(1UL << 14)
 
 /*
  * Remap in progress. Callers that wish to update file data while
@@ -359,7 +359,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
  * the lock in exclusive mode. Relocking the file will block until
  * IREMAPPING is cleared.
  */
-#define XFS_IREMAPPING		(1U << 15)
+#define XFS_IREMAPPING		(1UL << 15)
+
+#define XFS_IUNSHARE		(1UL << 16) /* file under unsharing */
 
 /* All inode state flags related to inode reclaim. */
 #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 7da0e8f961d3..7867e4a80b16 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1703,12 +1703,15 @@ xfs_reflink_unshare(
 
 	inode_dio_wait(inode);
 
-	if (IS_DAX(inode))
+	if (IS_DAX(inode)) {
 		error = dax_file_unshare(inode, offset, len,
 				&xfs_dax_write_iomap_ops);
-	else
+	} else {
+		xfs_iflags_set(ip, XFS_IUNSHARE);
 		error = iomap_file_unshare(inode, offset, len,
 				&xfs_buffered_write_iomap_ops);
+		xfs_iflags_clear(ip, XFS_IUNSHARE);
+	}
 	if (error)
 		goto out;
 
-- 
2.39.3 (Apple Git-146)


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-17 21:26 [PATCH] xfs: dont remain new blocks in cowfork for unshare Wengang Wang
@ 2024-05-20 18:08 ` Darrick J. Wong
  2024-05-20 22:42   ` Wengang Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Darrick J. Wong @ 2024-05-20 18:08 UTC (permalink / raw)
  To: Wengang Wang; +Cc: linux-xfs

On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks

                       allocated

> has no hint that future writes would like come to the blocks that follow the
> unshared ones, the extra blocks in Cow fork is meaningless.
> 
> This patch makes that no new blocks caused by unshare remain in Cow fork.
> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
> contigurous in unshare path when there are multiple extents to unshare.

  contiguous

Aha, so you're trying to combat fragmentation by making unshare use
delayed allocation so that we try to allocate one big extent all at once
instead of doing this piece by piece.  Or maybe you also don't want
unshare to preallocate cow extents beyond the range requested?

> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
>  fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
>  fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
>  fs/xfs/xfs_reflink.c |  7 +++++--
>  3 files changed, 47 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index d55b42b2480d..ade945c8d783 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -58,6 +58,15 @@ xfs_get_extsz_hint(
>  	 */
>  	if (xfs_is_always_cow_inode(ip))
>  		return 0;
> +
> +	/*
> +	 * let xfs_buffered_write_iomap_begin() do delayed allocation
> +	 * in unshare path so that the new blocks have more chance to
> +	 * be contigurous
> +	 */
> +	if (xfs_iflags_test(ip, XFS_IUNSHARE))
> +		return 0;

What if the inode is a realtime file?  Will this work with the rt
delalloc support coming online in 6.10?

> +
>  	if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
>  		return ip->i_extsize;
>  	if (XFS_IS_REALTIME_INODE(ip))
> @@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
>  {
>  	xfs_extlen_t		a, b;
>  
> +	/*
> +	 * in unshare path, allocate exactly the number of the blocks to be
> +	 * unshared so that no new blocks caused the unshare operation remain
> +	 * in Cow fork after the unshare is done
> +	 */
> +	if (xfs_iflags_test(ip, XFS_IUNSHARE))
> +		return 1;

Aha, so this is also about turning off speculative preallocations
outside the range that's being unshared?

> +
>  	a = 0;
>  	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
>  		a = ip->i_cowextsize;
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index ab46ffb3ac19..6a8ad68dac1e 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
>   * i_flags helper functions
>   */
>  static inline void
> -__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
> +__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)

I think this is already queued for 6.10.

>  {
>  	ip->i_flags |= flags;
>  }
>  
>  static inline void
> -xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
> +xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
>  {
>  	spin_lock(&ip->i_flags_lock);
>  	__xfs_iflags_set(ip, flags);
> @@ -221,7 +221,7 @@ xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
>  }
>  
>  static inline void
> -xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
> +xfs_iflags_clear(xfs_inode_t *ip, unsigned long flags)
>  {
>  	spin_lock(&ip->i_flags_lock);
>  	ip->i_flags &= ~flags;
> @@ -229,13 +229,13 @@ xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
>  }
>  
>  static inline int
> -__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
> +__xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
>  {
>  	return (ip->i_flags & flags);
>  }
>  
>  static inline int
> -xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
> +xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
>  {
>  	int ret;
>  	spin_lock(&ip->i_flags_lock);
> @@ -245,7 +245,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
>  }
>  
>  static inline int
> -xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
> +xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned long flags)
>  {
>  	int ret;
>  
> @@ -258,7 +258,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
>  }
>  
>  static inline int
> -xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
> +xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
>  {
>  	int ret;
>  
> @@ -321,25 +321,25 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
>  /*
>   * In-core inode flags.
>   */
> -#define XFS_IRECLAIM		(1 << 0) /* started reclaiming this inode */
> -#define XFS_ISTALE		(1 << 1) /* inode has been staled */
> -#define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
> -#define XFS_INEW		(1 << 3) /* inode has just been allocated */
> -#define XFS_IPRESERVE_DM_FIELDS	(1 << 4) /* has legacy DMAPI fields set */
> -#define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
> -#define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
> -#define XFS_IFLUSHING		(1 << 7) /* inode is being flushed */
> +#define XFS_IRECLAIM		(1UL << 0) /* started reclaiming this inode */
> +#define XFS_ISTALE		(1UL << 1) /* inode has been staled */
> +#define XFS_IRECLAIMABLE	(1UL<< 2) /* inode can be reclaimed */
> +#define XFS_INEW		(1UL<< 3) /* inode has just been allocated */
> +#define XFS_IPRESERVE_DM_FIELDS	(1UL << 4) /* has legacy DMAPI fields set */
> +#define XFS_ITRUNCATED		(1UL << 5) /* truncated down so flush-on-close */
> +#define XFS_IDIRTY_RELEASE	(1UL << 6) /* dirty release already seen */
> +#define XFS_IFLUSHING		(1UL << 7) /* inode is being flushed */
>  #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
> -#define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
> -#define XFS_IEOFBLOCKS		(1 << 9) /* has the preallocblocks tag set */
> -#define XFS_NEED_INACTIVE	(1 << 10) /* see XFS_INACTIVATING below */
> +#define XFS_IPINNED		(1UL << __XFS_IPINNED_BIT)UL
> +#define XFS_IEOFBLOCKS		(1UL << 9) /* has the preallocblocks tag set */
> +#define XFS_NEED_INACTIVE	(1UL << 10) /* see XFS_INACTIVATING below */
>  /*
>   * If this unlinked inode is in the middle of recovery, don't let drop_inode
>   * truncate and free the inode.  This can happen if we iget the inode during
>   * log recovery to replay a bmap operation on the inode.
>   */
> -#define XFS_IRECOVERY		(1 << 11)
> -#define XFS_ICOWBLOCKS		(1 << 12)/* has the cowblocks tag set */
> +#define XFS_IRECOVERY		(1UL << 11)
> +#define XFS_ICOWBLOCKS		(1UL << 12)/* has the cowblocks tag set */
>  
>  /*
>   * If we need to update on-disk metadata before this IRECLAIMABLE inode can be
> @@ -348,10 +348,10 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
>   * inactivation completes, both flags will be cleared and the inode is a
>   * plain old IRECLAIMABLE inode.
>   */
> -#define XFS_INACTIVATING	(1 << 13)
> +#define XFS_INACTIVATING	(1UL << 13)
>  
>  /* Quotacheck is running but inode has not been added to quota counts. */
> -#define XFS_IQUOTAUNCHECKED	(1 << 14)
> +#define XFS_IQUOTAUNCHECKED	(1UL << 14)
>  
>  /*
>   * Remap in progress. Callers that wish to update file data while
> @@ -359,7 +359,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
>   * the lock in exclusive mode. Relocking the file will block until
>   * IREMAPPING is cleared.
>   */
> -#define XFS_IREMAPPING		(1U << 15)
> +#define XFS_IREMAPPING		(1UL << 15)
> +
> +#define XFS_IUNSHARE		(1UL << 16) /* file under unsharing */
>  
>  /* All inode state flags related to inode reclaim. */
>  #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 7da0e8f961d3..7867e4a80b16 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1703,12 +1703,15 @@ xfs_reflink_unshare(
>  
>  	inode_dio_wait(inode);
>  
> -	if (IS_DAX(inode))
> +	if (IS_DAX(inode)) {
>  		error = dax_file_unshare(inode, offset, len,
>  				&xfs_dax_write_iomap_ops);
> -	else
> +	} else {
> +		xfs_iflags_set(ip, XFS_IUNSHARE);
>  		error = iomap_file_unshare(inode, offset, len,
>  				&xfs_buffered_write_iomap_ops);
> +		xfs_iflags_clear(ip, XFS_IUNSHARE);
> +	}
>  	if (error)
>  		goto out;
>  
> -- 
> 2.39.3 (Apple Git-146)
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-20 18:08 ` Darrick J. Wong
@ 2024-05-20 22:42   ` Wengang Wang
  2024-05-31 15:46     ` Wengang Wang
  2024-05-31 16:00     ` Darrick J. Wong
  0 siblings, 2 replies; 9+ messages in thread
From: Wengang Wang @ 2024-05-20 22:42 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs@vger.kernel.org

Thanks Darrick for review, pls see inlines:

> On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
> 
> On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
>> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
>> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
> 
>                       allocated
> 
>> has no hint that future writes would like come to the blocks that follow the
>> unshared ones, the extra blocks in Cow fork is meaningless.
>> 
>> This patch makes that no new blocks caused by unshare remain in Cow fork.
>> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
>> contigurous in unshare path when there are multiple extents to unshare.
> 
>  contiguous
> 
Sorry for typos.

> Aha, so you're trying to combat fragmentation by making unshare use
> delayed allocation so that we try to allocate one big extent all at once
> instead of doing this piece by piece.  Or maybe you also don't want
> unshare to preallocate cow extents beyond the range requested?
> 

Yes, The main purpose is for the later (avoid preallocating beyond). The patch
also makes unshare use delayed allocation for bigger extent.


>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
>> ---
>> fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
>> fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
>> fs/xfs/xfs_reflink.c |  7 +++++--
>> 3 files changed, 47 insertions(+), 25 deletions(-)
>> 
>> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
>> index d55b42b2480d..ade945c8d783 100644
>> --- a/fs/xfs/xfs_inode.c
>> +++ b/fs/xfs/xfs_inode.c
>> @@ -58,6 +58,15 @@ xfs_get_extsz_hint(
>>  */
>> if (xfs_is_always_cow_inode(ip))
>> return 0;
>> +
>> + /*
>> +  * let xfs_buffered_write_iomap_begin() do delayed allocation
>> +  * in unshare path so that the new blocks have more chance to
>> +  * be contigurous
>> +  */
>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>> + return 0;
> 
> What if the inode is a realtime file?  Will this work with the rt
> delalloc support coming online in 6.10?

This XFS_IUNSHARE is not set in xfs_reflink_unshare() for rt inodes. 
So rt inodes will keep current behavior.
 
> 
>> +
>> if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
>> return ip->i_extsize;
>> if (XFS_IS_REALTIME_INODE(ip))
>> @@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
>> {
>> xfs_extlen_t a, b;
>> 
>> + /*
>> +  * in unshare path, allocate exactly the number of the blocks to be
>> +  * unshared so that no new blocks caused the unshare operation remain
>> +  * in Cow fork after the unshare is done
>> +  */
>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>> + return 1;
> 
> Aha, so this is also about turning off speculative preallocations
> outside the range that's being unshared?

Yes.

> 
>> +
>> a = 0;
>> if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
>> a = ip->i_cowextsize;
>> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
>> index ab46ffb3ac19..6a8ad68dac1e 100644
>> --- a/fs/xfs/xfs_inode.h
>> +++ b/fs/xfs/xfs_inode.h
>> @@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
>>  * i_flags helper functions
>>  */
>> static inline void
>> -__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
>> +__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
> 
> I think this is already queued for 6.10.

Good to know.

Thanks,
Wengang


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-20 22:42   ` Wengang Wang
@ 2024-05-31 15:46     ` Wengang Wang
  2024-05-31 16:00     ` Darrick J. Wong
  1 sibling, 0 replies; 9+ messages in thread
From: Wengang Wang @ 2024-05-31 15:46 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs@vger.kernel.org

Hi,

Any idea on this?

Thanks,
wengang

> On May 20, 2024, at 3:42 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
> 
> Thanks Darrick for review, pls see inlines:
> 
>> On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
>> 
>> On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
>>> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
>>> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
>> 
>>                      allocated
>> 
>>> has no hint that future writes would like come to the blocks that follow the
>>> unshared ones, the extra blocks in Cow fork is meaningless.
>>> 
>>> This patch makes that no new blocks caused by unshare remain in Cow fork.
>>> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
>>> contigurous in unshare path when there are multiple extents to unshare.
>> 
>> contiguous
>> 
> Sorry for typos.
> 
>> Aha, so you're trying to combat fragmentation by making unshare use
>> delayed allocation so that we try to allocate one big extent all at once
>> instead of doing this piece by piece.  Or maybe you also don't want
>> unshare to preallocate cow extents beyond the range requested?
>> 
> 
> Yes, The main purpose is for the later (avoid preallocating beyond). The patch
> also makes unshare use delayed allocation for bigger extent.
> 
> 
>>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
>>> ---
>>> fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
>>> fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
>>> fs/xfs/xfs_reflink.c |  7 +++++--
>>> 3 files changed, 47 insertions(+), 25 deletions(-)
>>> 
>>> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
>>> index d55b42b2480d..ade945c8d783 100644
>>> --- a/fs/xfs/xfs_inode.c
>>> +++ b/fs/xfs/xfs_inode.c
>>> @@ -58,6 +58,15 @@ xfs_get_extsz_hint(
>>> */
>>> if (xfs_is_always_cow_inode(ip))
>>> return 0;
>>> +
>>> + /*
>>> +  * let xfs_buffered_write_iomap_begin() do delayed allocation
>>> +  * in unshare path so that the new blocks have more chance to
>>> +  * be contigurous
>>> +  */
>>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>>> + return 0;
>> 
>> What if the inode is a realtime file?  Will this work with the rt
>> delalloc support coming online in 6.10?
> 
> This XFS_IUNSHARE is not set in xfs_reflink_unshare() for rt inodes. 
> So rt inodes will keep current behavior.
> 
>> 
>>> +
>>> if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
>>> return ip->i_extsize;
>>> if (XFS_IS_REALTIME_INODE(ip))
>>> @@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
>>> {
>>> xfs_extlen_t a, b;
>>> 
>>> + /*
>>> +  * in unshare path, allocate exactly the number of the blocks to be
>>> +  * unshared so that no new blocks caused the unshare operation remain
>>> +  * in Cow fork after the unshare is done
>>> +  */
>>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>>> + return 1;
>> 
>> Aha, so this is also about turning off speculative preallocations
>> outside the range that's being unshared?
> 
> Yes.
> 
>> 
>>> +
>>> a = 0;
>>> if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
>>> a = ip->i_cowextsize;
>>> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
>>> index ab46ffb3ac19..6a8ad68dac1e 100644
>>> --- a/fs/xfs/xfs_inode.h
>>> +++ b/fs/xfs/xfs_inode.h
>>> @@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
>>> * i_flags helper functions
>>> */
>>> static inline void
>>> -__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
>>> +__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
>> 
>> I think this is already queued for 6.10.
> 
> Good to know.
> 
> Thanks,
> Wengang



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-20 22:42   ` Wengang Wang
  2024-05-31 15:46     ` Wengang Wang
@ 2024-05-31 16:00     ` Darrick J. Wong
  2024-05-31 16:13       ` Christoph Hellwig
  2024-05-31 17:53       ` Wengang Wang
  1 sibling, 2 replies; 9+ messages in thread
From: Darrick J. Wong @ 2024-05-31 16:00 UTC (permalink / raw)
  To: Wengang Wang; +Cc: linux-xfs@vger.kernel.org

On Mon, May 20, 2024 at 10:42:02PM +0000, Wengang Wang wrote:
> Thanks Darrick for review, pls see inlines:
> 
> > On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
> > 
> > On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
> >> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
> >> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
> > 
> >                       allocated
> > 
> >> has no hint that future writes would like come to the blocks that follow the
> >> unshared ones, the extra blocks in Cow fork is meaningless.
> >> 
> >> This patch makes that no new blocks caused by unshare remain in Cow fork.
> >> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
> >> contigurous in unshare path when there are multiple extents to unshare.
> > 
> >  contiguous
> > 
> Sorry for typos.
> 
> > Aha, so you're trying to combat fragmentation by making unshare use
> > delayed allocation so that we try to allocate one big extent all at once
> > instead of doing this piece by piece.  Or maybe you also don't want
> > unshare to preallocate cow extents beyond the range requested?
> > 
> 
> Yes, The main purpose is for the later (avoid preallocating beyond).

But the user set an extent size hint, so presumably they want us to (try
to) obey that even for unshare operations, right?

> The patch also makes unshare use delayed allocation for bigger extent.

If there's a good reason for not trying, can we avoid the iflag by
detecting IOMAP_UNSHARE in the @flags parameter to
xfs_buffered_write_iomap_begin and thereby use delalloc if there isn't
an extent size hint set?

(IOWs I don't really like that an upper layer of the fs sets a flag for
a lower layer to catch based on the context of whatever operation it's
doing, and in the meantime another thread could observe that state and
make different decisions.)

> >> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> >> ---
> >> fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
> >> fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
> >> fs/xfs/xfs_reflink.c |  7 +++++--
> >> 3 files changed, 47 insertions(+), 25 deletions(-)
> >> 
> >> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> >> index d55b42b2480d..ade945c8d783 100644
> >> --- a/fs/xfs/xfs_inode.c
> >> +++ b/fs/xfs/xfs_inode.c
> >> @@ -58,6 +58,15 @@ xfs_get_extsz_hint(
> >>  */
> >> if (xfs_is_always_cow_inode(ip))
> >> return 0;
> >> +
> >> + /*
> >> +  * let xfs_buffered_write_iomap_begin() do delayed allocation
> >> +  * in unshare path so that the new blocks have more chance to
> >> +  * be contigurous

"contiguous"

> >> +  */
> >> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
> >> + return 0;
> > 
> > What if the inode is a realtime file?  Will this work with the rt
> > delalloc support coming online in 6.10?
> 
> This XFS_IUNSHARE is not set in xfs_reflink_unshare() for rt inodes. 
> So rt inodes will keep current behavior.

<nod> Please rebase this patch against 6.10-rc1 now that it's out.

--D

> > 
> >> +
> >> if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
> >> return ip->i_extsize;
> >> if (XFS_IS_REALTIME_INODE(ip))
> >> @@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
> >> {
> >> xfs_extlen_t a, b;
> >> 
> >> + /*
> >> +  * in unshare path, allocate exactly the number of the blocks to be
> >> +  * unshared so that no new blocks caused the unshare operation remain
> >> +  * in Cow fork after the unshare is done
> >> +  */
> >> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
> >> + return 1;
> > 
> > Aha, so this is also about turning off speculative preallocations
> > outside the range that's being unshared?
> 
> Yes.
> 
> > 
> >> +
> >> a = 0;
> >> if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
> >> a = ip->i_cowextsize;
> >> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> >> index ab46ffb3ac19..6a8ad68dac1e 100644
> >> --- a/fs/xfs/xfs_inode.h
> >> +++ b/fs/xfs/xfs_inode.h
> >> @@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
> >>  * i_flags helper functions
> >>  */
> >> static inline void
> >> -__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
> >> +__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
> > 
> > I think this is already queued for 6.10.
> 
> Good to know.
> 
> Thanks,
> Wengang
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-31 16:00     ` Darrick J. Wong
@ 2024-05-31 16:13       ` Christoph Hellwig
  2024-05-31 17:53       ` Wengang Wang
  1 sibling, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2024-05-31 16:13 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Wengang Wang, linux-xfs@vger.kernel.org

On Fri, May 31, 2024 at 09:00:53AM -0700, Darrick J. Wong wrote:
> > Yes, The main purpose is for the later (avoid preallocating beyond).
> 
> But the user set an extent size hint, so presumably they want us to (try
> to) obey that even for unshare operations, right?
> 
> > The patch also makes unshare use delayed allocation for bigger extent.
> 
> If there's a good reason for not trying, can we avoid the iflag by
> detecting IOMAP_UNSHARE in the @flags parameter to
> xfs_buffered_write_iomap_begin and thereby use delalloc if there isn't
> an extent size hint set?

.. or (even if this is scope creep) just make delalloc work for
extent size hints.  Now that we create all extents as unwritten there
is no fundamental reason for that to be dangerous, it'll just need
a little more work.

> (IOWs I don't really like that an upper layer of the fs sets a flag for
> a lower layer to catch based on the context of whatever operation it's
> doing, and in the meantime another thread could observe that state and
> make different decisions.)

Yes, that's pretty much a no-go.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-31 16:00     ` Darrick J. Wong
  2024-05-31 16:13       ` Christoph Hellwig
@ 2024-05-31 17:53       ` Wengang Wang
  2024-06-06 18:16         ` Wengang Wang
  1 sibling, 1 reply; 9+ messages in thread
From: Wengang Wang @ 2024-05-31 17:53 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs@vger.kernel.org



> On May 31, 2024, at 9:00 AM, Darrick J. Wong <djwong@kernel.org> wrote:
> 
> On Mon, May 20, 2024 at 10:42:02PM +0000, Wengang Wang wrote:
>> Thanks Darrick for review, pls see inlines:
>> 
>>> On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
>>> 
>>> On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
>>>> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
>>>> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
>>> 
>>>                      allocated
>>> 
>>>> has no hint that future writes would like come to the blocks that follow the
>>>> unshared ones, the extra blocks in Cow fork is meaningless.
>>>> 
>>>> This patch makes that no new blocks caused by unshare remain in Cow fork.
>>>> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
>>>> contigurous in unshare path when there are multiple extents to unshare.
>>> 
>>> contiguous
>>> 
>> Sorry for typos.
>> 
>>> Aha, so you're trying to combat fragmentation by making unshare use
>>> delayed allocation so that we try to allocate one big extent all at once
>>> instead of doing this piece by piece.  Or maybe you also don't want
>>> unshare to preallocate cow extents beyond the range requested?
>>> 
>> 
>> Yes, The main purpose is for the later (avoid preallocating beyond).
> 
> But the user set an extent size hint, so presumably they want us to (try
> to) obey that even for unshare operations, right?

Yeah, user might set extsize for better IO performance. But they don’t really know
much details. Consider this case: 
writing to those over/beyond preallocated blocks would cause Cow. Cow includes
extra meta changes: releasing old blocks, inserting new extents to data fork and removing
staging extents from refcount tree.  That’s a lot, as I think, a Cow is slower than block over-write.
In above case, the Cow is caused by unshare, rather than by shared blocks. That might be
not what user expected by setting extsize.


> 
>> The patch also makes unshare use delayed allocation for bigger extent.
> 
> If there's a good reason for not trying, can we avoid the iflag by
> detecting IOMAP_UNSHARE in the @flags parameter to
> xfs_buffered_write_iomap_begin

Yes, that would be better.

> and thereby use delalloc if there isn't
> an extent size hint set?

Hm… currently it’s using dealloc if extsize is 0. So you meant skip extsize (act as extsize is 0)
when IOMAP_UNSHARE is there?

> 
> (IOWs I don't really like that an upper layer of the fs sets a flag for
> a lower layer to catch based on the context of whatever operation it's
> doing, and in the meantime another thread could observe that state and
> make different decisions.)

Then what I can think the way to go might be to add new parameters to functions.
That might involves a lot of functions in the call path. Is that really better?
Or you have other better ways?

> 
>>>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
>>>> ---
>>>> fs/xfs/xfs_inode.c   | 17 ++++++++++++++++
>>>> fs/xfs/xfs_inode.h   | 48 +++++++++++++++++++++++---------------------
>>>> fs/xfs/xfs_reflink.c |  7 +++++--
>>>> 3 files changed, 47 insertions(+), 25 deletions(-)
>>>> 
>>>> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
>>>> index d55b42b2480d..ade945c8d783 100644
>>>> --- a/fs/xfs/xfs_inode.c
>>>> +++ b/fs/xfs/xfs_inode.c
>>>> @@ -58,6 +58,15 @@ xfs_get_extsz_hint(
>>>> */
>>>> if (xfs_is_always_cow_inode(ip))
>>>> return 0;
>>>> +
>>>> + /*
>>>> +  * let xfs_buffered_write_iomap_begin() do delayed allocation
>>>> +  * in unshare path so that the new blocks have more chance to
>>>> +  * be contigurous
> 
> "contiguous"
> 
>>>> +  */
>>>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>>>> + return 0;
>>> 
>>> What if the inode is a realtime file?  Will this work with the rt
>>> delalloc support coming online in 6.10?
>> 
>> This XFS_IUNSHARE is not set in xfs_reflink_unshare() for rt inodes. 
>> So rt inodes will keep current behavior.
> 
> <nod> Please rebase this patch against 6.10-rc1 now that it's out.
> 

OK, will do.

Thanks,
Wengang

> --D
> 
>>> 
>>>> +
>>>> if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
>>>> return ip->i_extsize;
>>>> if (XFS_IS_REALTIME_INODE(ip))
>>>> @@ -77,6 +86,14 @@ xfs_get_cowextsz_hint(
>>>> {
>>>> xfs_extlen_t a, b;
>>>> 
>>>> + /*
>>>> +  * in unshare path, allocate exactly the number of the blocks to be
>>>> +  * unshared so that no new blocks caused the unshare operation remain
>>>> +  * in Cow fork after the unshare is done
>>>> +  */
>>>> + if (xfs_iflags_test(ip, XFS_IUNSHARE))
>>>> + return 1;
>>> 
>>> Aha, so this is also about turning off speculative preallocations
>>> outside the range that's being unshared?
>> 
>> Yes.
>> 
>>> 
>>>> +
>>>> a = 0;
>>>> if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
>>>> a = ip->i_cowextsize;
>>>> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
>>>> index ab46ffb3ac19..6a8ad68dac1e 100644
>>>> --- a/fs/xfs/xfs_inode.h
>>>> +++ b/fs/xfs/xfs_inode.h
>>>> @@ -207,13 +207,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
>>>> * i_flags helper functions
>>>> */
>>>> static inline void
>>>> -__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
>>>> +__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
>>> 
>>> I think this is already queued for 6.10.
>> 
>> Good to know.
>> 
>> Thanks,
>> Wengang



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-05-31 17:53       ` Wengang Wang
@ 2024-06-06 18:16         ` Wengang Wang
  2024-06-11 17:00           ` Darrick J. Wong
  0 siblings, 1 reply; 9+ messages in thread
From: Wengang Wang @ 2024-06-06 18:16 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs@vger.kernel.org



> On May 31, 2024, at 10:53 AM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
> 
> 
> 
>> On May 31, 2024, at 9:00 AM, Darrick J. Wong <djwong@kernel.org> wrote:
>> 
>> On Mon, May 20, 2024 at 10:42:02PM +0000, Wengang Wang wrote:
>>> Thanks Darrick for review, pls see inlines:
>>> 
>>>> On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
>>>> 
>>>> On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
>>>>> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
>>>>> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
>>>> 
>>>>                     allocated
>>>> 
>>>>> has no hint that future writes would like come to the blocks that follow the
>>>>> unshared ones, the extra blocks in Cow fork is meaningless.
>>>>> 
>>>>> This patch makes that no new blocks caused by unshare remain in Cow fork.
>>>>> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
>>>>> contigurous in unshare path when there are multiple extents to unshare.
>>>> 
>>>> contiguous
>>>> 
>>> Sorry for typos.
>>> 
>>>> Aha, so you're trying to combat fragmentation by making unshare use
>>>> delayed allocation so that we try to allocate one big extent all at once
>>>> instead of doing this piece by piece.  Or maybe you also don't want
>>>> unshare to preallocate cow extents beyond the range requested?
>>>> 
>>> 
>>> Yes, The main purpose is for the later (avoid preallocating beyond).
>> 
>> But the user set an extent size hint, so presumably they want us to (try
>> to) obey that even for unshare operations, right?
> 
> Yeah, user might set extsize for better IO performance. But they don’t really know
> much details. Consider this case: 
> writing to those over/beyond preallocated blocks would cause Cow. Cow includes
> extra meta changes: releasing old blocks, inserting new extents to data fork and removing
> staging extents from refcount tree.  That’s a lot, as I think, a Cow is slower than block over-write.
> In above case, the Cow is caused by unshare, rather than by shared blocks. That might be
> not what user expected by setting extsize.
> 
> 
May I know if this is a good reason to skip extsize setting, or we anyways honor extsize?

Thanks,
Wengang


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xfs: dont remain new blocks in cowfork for unshare
  2024-06-06 18:16         ` Wengang Wang
@ 2024-06-11 17:00           ` Darrick J. Wong
  0 siblings, 0 replies; 9+ messages in thread
From: Darrick J. Wong @ 2024-06-11 17:00 UTC (permalink / raw)
  To: Wengang Wang; +Cc: linux-xfs@vger.kernel.org

On Thu, Jun 06, 2024 at 06:16:23PM +0000, Wengang Wang wrote:
> 
> 
> > On May 31, 2024, at 10:53 AM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
> > 
> > 
> > 
> >> On May 31, 2024, at 9:00 AM, Darrick J. Wong <djwong@kernel.org> wrote:
> >> 
> >> On Mon, May 20, 2024 at 10:42:02PM +0000, Wengang Wang wrote:
> >>> Thanks Darrick for review, pls see inlines:
> >>> 
> >>>> On May 20, 2024, at 11:08 AM, Darrick J. Wong <djwong@kernel.org> wrote:
> >>>> 
> >>>> On Fri, May 17, 2024 at 02:26:21PM -0700, Wengang Wang wrote:
> >>>>> Unsharing blocks is implemented by doing CoW to those blocks. That has a side
> >>>>> effect that some new allocatd blocks remain in inode Cow fork. As unsharing blocks
> >>>> 
> >>>>                     allocated
> >>>> 
> >>>>> has no hint that future writes would like come to the blocks that follow the
> >>>>> unshared ones, the extra blocks in Cow fork is meaningless.
> >>>>> 
> >>>>> This patch makes that no new blocks caused by unshare remain in Cow fork.
> >>>>> The change in xfs_get_extsz_hint() makes the new blocks have more change to be
> >>>>> contigurous in unshare path when there are multiple extents to unshare.
> >>>> 
> >>>> contiguous
> >>>> 
> >>> Sorry for typos.
> >>> 
> >>>> Aha, so you're trying to combat fragmentation by making unshare use
> >>>> delayed allocation so that we try to allocate one big extent all at once
> >>>> instead of doing this piece by piece.  Or maybe you also don't want
> >>>> unshare to preallocate cow extents beyond the range requested?
> >>>> 
> >>> 
> >>> Yes, The main purpose is for the later (avoid preallocating beyond).
> >> 
> >> But the user set an extent size hint, so presumably they want us to (try
> >> to) obey that even for unshare operations, right?
> > 
> > Yeah, user might set extsize for better IO performance. But they don’t really know
> > much details. Consider this case: 
> > writing to those over/beyond preallocated blocks would cause Cow. Cow includes
> > extra meta changes: releasing old blocks, inserting new extents to data fork and removing
> > staging extents from refcount tree.  That’s a lot, as I think, a Cow is slower than block over-write.
> > In above case, the Cow is caused by unshare, rather than by shared blocks. That might be
> > not what user expected by setting extsize.
> > 
> > 
> May I know if this is a good reason to skip extsize setting, or we
> anyways honor extsize?

I'm not sure -- if someone set (say) a 256k cowextsize and later wants
to unshare a single 4k block of that, they might think it's useful for
the fs to try to push surrounding writes to the same 256k region to
combat fragmentation.

OTOH if the cowextsize is (say) 2M (or 1G) then that might be excessive?
Particularly if the cow reservation shadows an already unshared data
fork block, in which case (as you point out) this turns a cheap
overwrite into an expensive cow.  IOWs, we're trading less fragmentation
for higher individual write times.

Hmm, maybe there's another aspect to think about -- for a directio cow
write, I think we skip the cowextsize thing and only allocate the exact
range the caller asked for.  If that's correct, then perhaps unshare
should follow the directio write allocation pattern if the struct file
has O_DIRECT set?

--D

> Thanks,
> Wengang
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-06-11 17:00 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-17 21:26 [PATCH] xfs: dont remain new blocks in cowfork for unshare Wengang Wang
2024-05-20 18:08 ` Darrick J. Wong
2024-05-20 22:42   ` Wengang Wang
2024-05-31 15:46     ` Wengang Wang
2024-05-31 16:00     ` Darrick J. Wong
2024-05-31 16:13       ` Christoph Hellwig
2024-05-31 17:53       ` Wengang Wang
2024-06-06 18:16         ` Wengang Wang
2024-06-11 17:00           ` Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox