public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Lukas Herbolt <lukas@herbolt.com>
To: Pankaj Raghav <p.raghav@samsung.com>
Cc: linux-xfs@vger.kernel.org, bfoster@redhat.com,
	"Darrick J . Wong" <djwong@kernel.org>,
	dgc@kernel.org, gost.dev@samsung.com, pankaj.raghav@linux.dev,
	kundan.kumar@samsung.com, cem@kernel.org, hch@infradead.org
Subject: Re: [PATCH v2 2/2] xfs: add support for FALLOC_FL_WRITE_ZEROES
Date: Mon, 13 Apr 2026 17:05:41 +0200	[thread overview]
Message-ID: <c086dc6d3c95d1aed1ac0950ef3278ea@herbolt.com> (raw)
In-Reply-To: <20260413133256.3378243-3-p.raghav@samsung.com>

On 2026-04-13 15:32, Pankaj Raghav wrote:
> If the underlying block device supports the unmap write zeroes
> operation, this flag allows users to quickly preallocate a file with
> written extents that contain zeroes. This is beneficial for subsequent
> overwrites as it prevents the need for unwritten-to-written extent
> conversions, thereby significantly reducing metadata updates and 
> journal
> I/O overhead, improving overwrite performance.
> 
> Co-developed-by: Lukas Herbolt <lukas@herbolt.com>
> Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> ---
>  fs/xfs/xfs_file.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 845a97c9b063..99a02982154a 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1368,6 +1368,57 @@ xfs_falloc_force_zero(
>  	return XFS_TEST_ERROR(ip->i_mount, XFS_ERRTAG_FORCE_ZERO_RANGE);
>  }
> 
> +static int
> +xfs_falloc_write_zeroes(
> +	struct file		*file,
> +	int			mode,
> +	loff_t			offset,
> +	loff_t			len,
> +	struct xfs_zone_alloc_ctx *ac)
> +{
> +	struct inode		*inode = file_inode(file);
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	loff_t			new_size = 0;
> +	loff_t			old_size = XFS_ISIZE(ip);
> +	int			error;
> +	unsigned int		blksize = i_blocksize(inode);
> +	loff_t			offset_aligned = round_down(offset, blksize);
> +	bool			did_zero;
> +
> +	if (xfs_is_always_cow_inode(ip) ||
> +	    !bdev_write_zeroes_unmap_sectors(
> +		    xfs_inode_buftarg(XFS_I(inode))->bt_bdev))
> +		return -EOPNOTSUPP;
> +
> +	error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
> +	if (error)
> +		return error;
> +
> +	error = xfs_free_file_space(ip, offset, len, ac);
> +	if (error)
> +		return error;
> +
> +	/*
> +	 * Zero the tail of the old EOF block and any space up to the new
> +	 * offset.
> +	 * In the usual truncate path, xfs_falloc_setsize takes care of
> +	 * zeroing those blocks.
> +	 */
> +	if (offset_aligned > old_size)
> +		error = xfs_zero_range(ip, old_size, offset_aligned - old_size,
> +				NULL, &did_zero);
> +	if (error)
> +		return error;
> +
> +	error = xfs_bmap_alloc_or_convert_range(ip, offset, len,
> +			XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
> +			new_size ? true : false);
> +	if (error)
> +		return error;
> +
> +	return error;
> +}
> +
>  /*
>   * Punch a hole and prealloc the range.  We use a hole punch rather 
> than
>   * unwritten extent conversion for two reasons:
> @@ -1470,7 +1521,7 @@ xfs_falloc_allocate_range(
>  		(FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE |	\
>  		 FALLOC_FL_PUNCH_HOLE |	FALLOC_FL_COLLAPSE_RANGE |	\
>  		 FALLOC_FL_ZERO_RANGE |	FALLOC_FL_INSERT_RANGE |	\
> -		 FALLOC_FL_UNSHARE_RANGE)
> +		 FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_WRITE_ZEROES)
> 
>  STATIC long
>  __xfs_file_fallocate(
> @@ -1522,6 +1573,9 @@ __xfs_file_fallocate(
>  	case FALLOC_FL_ALLOCATE_RANGE:
>  		error = xfs_falloc_allocate_range(file, mode, offset, len);
>  		break;
> +	case FALLOC_FL_WRITE_ZEROES:
> +		error = xfs_falloc_write_zeroes(file, mode, offset, len, ac);
> +		break;
>  	default:
>  		error = -EOPNOTSUPP;
>  		break;

I have debug option to skip the check of LBPRZ/DLFEAT on the underlying
device for testing on regular devices.

+       if (xfs_is_always_cow_inode(ip) ||
+           
!bdev_write_zeroes_unmap_sectors(xfs_inode_buftarg(ip)->bt_bdev)) {
+#ifdef DEBUG
+               if (!xfs_globals.allow_write_zero)
+#endif
+                       return -EOPNOTSUPP;


diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 4527119b2961..3436e6b574dd 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -314,6 +314,29 @@ bload_node_slack_show(
  }
  XFS_SYSFS_ATTR_RW(bload_node_slack);

+static ssize_t
+allow_write_zero_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       ssize_t         ret;
+
+       ret = kstrtobool(buf, &xfs_globals.allow_write_zero);
+       if (ret < 0)
+               return ret;
+       return count;
+}
+
+static ssize_t
+allow_write_zero_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       return sysfs_emit(buf, "%d\n", xfs_globals.allow_write_zero);
+}
+XFS_SYSFS_ATTR_RW(allow_write_zero);
+
  static struct attribute *xfs_dbg_attrs[] = {
         ATTR_LIST(bug_on_assert),
         ATTR_LIST(log_recovery_delay),
@@ -323,6 +346,7 @@ static struct attribute *xfs_dbg_attrs[] = {
         ATTR_LIST(larp),
         ATTR_LIST(bload_leaf_slack),
         ATTR_LIST(bload_node_slack),
+       ATTR_LIST(allow_write_zero),
         NULL,
  };
  ATTRIBUTE_GROUPS(xfs_dbg);

diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index ed9d896079c1..464cdfb22f5b 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -86,6 +86,7 @@ struct xfs_globals {
         int     mount_delay;            /* mount setup delay (secs) */
         bool    bug_on_assert;          /* BUG() the kernel on assert 
failure */
         bool    always_cow;             /* use COW fork for all 
overwrites */
+       bool    allow_write_zero;       /* Allow WRITE_ZERO on any HW */
  };
  extern struct xfs_globals      xfs_globals;


-- 
-lhe

  reply	other threads:[~2026-04-13 15:13 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-13 13:32 [PATCH v2 0/2] add FALLOC_FL_WRITE_ZEROES support to xfs Pankaj Raghav
2026-04-13 13:32 ` [PATCH v2 1/2] xfs: add xfs_bmap_alloc_or_convert_range function Pankaj Raghav
2026-04-13 13:32 ` [PATCH v2 2/2] xfs: add support for FALLOC_FL_WRITE_ZEROES Pankaj Raghav
2026-04-13 15:05   ` Lukas Herbolt [this message]
2026-04-14  6:08     ` Pankaj
2026-04-13 15:07 ` [PATCH v2 0/2] add FALLOC_FL_WRITE_ZEROES support to xfs Lukas Herbolt
2026-04-14  6:11   ` Pankaj

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c086dc6d3c95d1aed1ac0950ef3278ea@herbolt.com \
    --to=lukas@herbolt.com \
    --cc=bfoster@redhat.com \
    --cc=cem@kernel.org \
    --cc=dgc@kernel.org \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hch@infradead.org \
    --cc=kundan.kumar@samsung.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=pankaj.raghav@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox