* [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base
@ 2026-01-20 13:20 cem
2026-01-20 15:57 ` Darrick J. Wong
2026-01-21 6:56 ` Christoph Hellwig
0 siblings, 2 replies; 4+ messages in thread
From: cem @ 2026-01-20 13:20 UTC (permalink / raw)
To: linux-xfs; +Cc: hch, djwong, lukas
From: Lukas Herbolt <lukas@herbolt.com>
Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
the unmap write zeroes operation.
Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
[cem: rewrite xfs_falloc_zero_range() bits]
---
Christoph, Darrick, could you please review/ack this patch again? I
needed to rewrite the xfs_falloc_zero_range() bits, because it
conflicted with 66d78a11479c and 8dc15b7a6e59. This version aims mostly
to remove one of the if-else nested levels to keep it a bit cleaner.
please let me know if you agree with this version, otherwise I'll ask
Lukas to rebase it on top of the new code.
Thanks!
fs/xfs/xfs_bmap_util.c | 10 ++++++++--
fs/xfs/xfs_bmap_util.h | 2 +-
fs/xfs/xfs_file.c | 38 +++++++++++++++++++++++++++-----------
3 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0ab00615f1ad..74a7597d0998 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -642,11 +642,17 @@ xfs_free_eofblocks(
return error;
}
+/*
+ * Callers can specify bmapi_flags, if XFS_BMAPI_ZERO is used there are no
+ * further checks whether the hard ware supports and it can fallback to
+ * software zeroing.
+ */
int
xfs_alloc_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len)
+ xfs_off_t len,
+ uint32_t bmapi_flags)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -748,7 +754,7 @@ xfs_alloc_file_space(
* will eventually reach the requested range.
*/
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ allocatesize_fsb, bmapi_flags, 0, imapp,
&nimaps);
if (error) {
if (error != -ENOSR)
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index c477b3361630..2895cc97a572 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -56,7 +56,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
/* preallocation and hole punch interface */
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
+ xfs_off_t len, uint32_t bmapi_flags);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d36a9aafa8ab..b23f1373116e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1302,16 +1302,29 @@ xfs_falloc_zero_range(
if (xfs_falloc_force_zero(ip, ac)) {
error = xfs_zero_range(ip, offset, len, ac, NULL);
- } else {
- error = xfs_free_file_space(ip, offset, len, ac);
- if (error)
- return error;
+ goto out;
+ }
- len = round_up(offset + len, blksize) -
- round_down(offset, blksize);
- offset = round_down(offset, blksize);
- error = xfs_alloc_file_space(ip, offset, len);
+ error = xfs_free_file_space(ip, offset, len, ac);
+ if (error)
+ return error;
+
+ len = round_up(offset + len, blksize) - round_down(offset, blksize);
+ offset = round_down(offset, blksize);
+
+ if (mode & FALLOC_FL_WRITE_ZEROES) {
+ if (xfs_is_always_cow_inode(ip) ||
+ !bdev_write_zeroes_unmap_sectors(
+ xfs_inode_buftarg(ip)->bt_bdev))
+ return -EOPNOTSUPP;
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_ZERO);
+ } else {
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_PREALLOC);
}
+
+out:
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
@@ -1336,7 +1349,8 @@ xfs_falloc_unshare_range(
if (error)
return error;
- error = xfs_alloc_file_space(XFS_I(inode), offset, len);
+ error = xfs_alloc_file_space(XFS_I(inode), offset, len,
+ XFS_BMAPI_PREALLOC);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
@@ -1364,7 +1378,8 @@ xfs_falloc_allocate_range(
if (error)
return error;
- error = xfs_alloc_file_space(XFS_I(inode), offset, len);
+ error = xfs_alloc_file_space(XFS_I(inode), offset, len,
+ XFS_BMAPI_PREALLOC);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
@@ -1374,7 +1389,7 @@ xfs_falloc_allocate_range(
(FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
- FALLOC_FL_UNSHARE_RANGE)
+ FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_WRITE_ZEROES)
STATIC long
__xfs_file_fallocate(
@@ -1417,6 +1432,7 @@ __xfs_file_fallocate(
case FALLOC_FL_INSERT_RANGE:
error = xfs_falloc_insert_range(file, offset, len);
break;
+ case FALLOC_FL_WRITE_ZEROES:
case FALLOC_FL_ZERO_RANGE:
error = xfs_falloc_zero_range(file, mode, offset, len, ac);
break;
--
2.52.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base
2026-01-20 13:20 [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base cem
@ 2026-01-20 15:57 ` Darrick J. Wong
2026-01-21 6:56 ` Christoph Hellwig
1 sibling, 0 replies; 4+ messages in thread
From: Darrick J. Wong @ 2026-01-20 15:57 UTC (permalink / raw)
To: cem; +Cc: linux-xfs, hch, lukas
On Tue, Jan 20, 2026 at 02:20:50PM +0100, cem@kernel.org wrote:
> From: Lukas Herbolt <lukas@herbolt.com>
>
> Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
> the unmap write zeroes operation.
>
> Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
> [cem: rewrite xfs_falloc_zero_range() bits]
> ---
>
> Christoph, Darrick, could you please review/ack this patch again? I
> needed to rewrite the xfs_falloc_zero_range() bits, because it
> conflicted with 66d78a11479c and 8dc15b7a6e59. This version aims mostly
> to remove one of the if-else nested levels to keep it a bit cleaner.
>
> please let me know if you agree with this version, otherwise I'll ask
> Lukas to rebase it on top of the new code.
>
> Thanks!
>
> fs/xfs/xfs_bmap_util.c | 10 ++++++++--
> fs/xfs/xfs_bmap_util.h | 2 +-
> fs/xfs/xfs_file.c | 38 +++++++++++++++++++++++++++-----------
> 3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 0ab00615f1ad..74a7597d0998 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -642,11 +642,17 @@ xfs_free_eofblocks(
> return error;
> }
>
> +/*
> + * Callers can specify bmapi_flags, if XFS_BMAPI_ZERO is used there are no
> + * further checks whether the hard ware supports and it can fallback to
> + * software zeroing.
> + */
> int
> xfs_alloc_file_space(
> struct xfs_inode *ip,
> xfs_off_t offset,
> - xfs_off_t len)
> + xfs_off_t len,
> + uint32_t bmapi_flags)
> {
> xfs_mount_t *mp = ip->i_mount;
> xfs_off_t count;
> @@ -748,7 +754,7 @@ xfs_alloc_file_space(
> * will eventually reach the requested range.
> */
> error = xfs_bmapi_write(tp, ip, startoffset_fsb,
> - allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
> + allocatesize_fsb, bmapi_flags, 0, imapp,
> &nimaps);
> if (error) {
> if (error != -ENOSR)
> diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
> index c477b3361630..2895cc97a572 100644
> --- a/fs/xfs/xfs_bmap_util.h
> +++ b/fs/xfs/xfs_bmap_util.h
> @@ -56,7 +56,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
>
> /* preallocation and hole punch interface */
> int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
> - xfs_off_t len);
> + xfs_off_t len, uint32_t bmapi_flags);
> int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
> xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
> int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index d36a9aafa8ab..b23f1373116e 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1302,16 +1302,29 @@ xfs_falloc_zero_range(
>
> if (xfs_falloc_force_zero(ip, ac)) {
> error = xfs_zero_range(ip, offset, len, ac, NULL);
> - } else {
> - error = xfs_free_file_space(ip, offset, len, ac);
> - if (error)
> - return error;
> + goto out;
> + }
>
> - len = round_up(offset + len, blksize) -
> - round_down(offset, blksize);
> - offset = round_down(offset, blksize);
> - error = xfs_alloc_file_space(ip, offset, len);
> + error = xfs_free_file_space(ip, offset, len, ac);
> + if (error)
> + return error;
> +
> + len = round_up(offset + len, blksize) - round_down(offset, blksize);
> + offset = round_down(offset, blksize);
> +
> + if (mode & FALLOC_FL_WRITE_ZEROES) {
> + if (xfs_is_always_cow_inode(ip) ||
> + !bdev_write_zeroes_unmap_sectors(
> + xfs_inode_buftarg(ip)->bt_bdev))
> + return -EOPNOTSUPP;
Taking a second look -- this code allows ZERO_RANGE|WRITE_ZEROES to
punch out the file space but then fail with EOPNOTSUPP. I think if
we're going to error out that way, we should do that at the top of the
function before any changes are made.
--D
> + error = xfs_alloc_file_space(ip, offset, len,
> + XFS_BMAPI_ZERO);
> + } else {
> + error = xfs_alloc_file_space(ip, offset, len,
> + XFS_BMAPI_PREALLOC);
> }
> +
> +out:
> if (error)
> return error;
> return xfs_falloc_setsize(file, new_size);
> @@ -1336,7 +1349,8 @@ xfs_falloc_unshare_range(
> if (error)
> return error;
>
> - error = xfs_alloc_file_space(XFS_I(inode), offset, len);
> + error = xfs_alloc_file_space(XFS_I(inode), offset, len,
> + XFS_BMAPI_PREALLOC);
> if (error)
> return error;
> return xfs_falloc_setsize(file, new_size);
> @@ -1364,7 +1378,8 @@ xfs_falloc_allocate_range(
> if (error)
> return error;
>
> - error = xfs_alloc_file_space(XFS_I(inode), offset, len);
> + error = xfs_alloc_file_space(XFS_I(inode), offset, len,
> + XFS_BMAPI_PREALLOC);
> if (error)
> return error;
> return xfs_falloc_setsize(file, new_size);
> @@ -1374,7 +1389,7 @@ xfs_falloc_allocate_range(
> (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
> FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
> FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
> - FALLOC_FL_UNSHARE_RANGE)
> + FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_WRITE_ZEROES)
>
> STATIC long
> __xfs_file_fallocate(
> @@ -1417,6 +1432,7 @@ __xfs_file_fallocate(
> case FALLOC_FL_INSERT_RANGE:
> error = xfs_falloc_insert_range(file, offset, len);
> break;
> + case FALLOC_FL_WRITE_ZEROES:
> case FALLOC_FL_ZERO_RANGE:
> error = xfs_falloc_zero_range(file, mode, offset, len, ac);
> break;
> --
> 2.52.0
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base
2026-01-20 13:20 [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base cem
2026-01-20 15:57 ` Darrick J. Wong
@ 2026-01-21 6:56 ` Christoph Hellwig
2026-01-29 7:39 ` lukas
1 sibling, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2026-01-21 6:56 UTC (permalink / raw)
To: cem; +Cc: linux-xfs, hch, djwong, lukas
On Tue, Jan 20, 2026 at 02:20:50PM +0100, cem@kernel.org wrote:
> From: Lukas Herbolt <lukas@herbolt.com>
>
> Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
> the unmap write zeroes operation.
>
> Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
> [cem: rewrite xfs_falloc_zero_range() bits]
Nit: once you modify something substantially and add your marker
you also need to sign off on it.
> ---
>
> Christoph, Darrick, could you please review/ack this patch again? I
> needed to rewrite the xfs_falloc_zero_range() bits, because it
> conflicted with 66d78a11479c and 8dc15b7a6e59. This version aims mostly
> to remove one of the if-else nested levels to keep it a bit cleaner.
Maybe mention the "merge conflict" in the above note?
> index d36a9aafa8ab..b23f1373116e 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1302,16 +1302,29 @@ xfs_falloc_zero_range(
>
> if (xfs_falloc_force_zero(ip, ac)) {
> error = xfs_zero_range(ip, offset, len, ac, NULL);
> + goto out;
> + }
>
> + error = xfs_free_file_space(ip, offset, len, ac);
> + if (error)
> + return error;
> +
> + len = round_up(offset + len, blksize) - round_down(offset, blksize);
> + offset = round_down(offset, blksize);
> +
> + if (mode & FALLOC_FL_WRITE_ZEROES) {
> + if (xfs_is_always_cow_inode(ip) ||
> + !bdev_write_zeroes_unmap_sectors(
> + xfs_inode_buftarg(ip)->bt_bdev))
> + return -EOPNOTSUPP;
> + error = xfs_alloc_file_space(ip, offset, len,
> + XFS_BMAPI_ZERO);
Darrick made a good point that we should check the not supported cases
earlier, even if that is an issue in the original version. Also I don't
think we should hit the force zero case for FALLOC_FL_WRITE_ZEROES.
I.e., this should probably become something like:
if (mode & FALLOC_FL_WRITE_ZEROES) {
if (xfs_is_always_cow_inode(ip) ||
!bdev_write_zeroes_unmap_sectors(
xfs_inode_buftarg(ip)->bt_bdev))
return -EOPNOTSUPP;
bmapi_flags = XFS_BMAPI_ZERO;
} else {
if (xfs_falloc_force_zero(ip, ac)) {
error = xfs_zero_range(ip, offset, len, ac, NULL);
goto set_filesize;
}
bmapi_flags = XFS_BMAPI_PREALLOC;
}
< free file space, round, etc.. >
error = xfs_alloc_file_space(ip, offset, len, bmapi_flags);
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base
2026-01-21 6:56 ` Christoph Hellwig
@ 2026-01-29 7:39 ` lukas
0 siblings, 0 replies; 4+ messages in thread
From: lukas @ 2026-01-29 7:39 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: cem, linux-xfs, djwong
On 2026-01-21 07:56, Christoph Hellwig wrote:
> On Tue, Jan 20, 2026 at 02:20:50PM +0100, cem@kernel.org wrote:
>> From: Lukas Herbolt <lukas@herbolt.com>
>>
>> Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
>> the unmap write zeroes operation.
>>
>> Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
>> [cem: rewrite xfs_falloc_zero_range() bits]
>
> Nit: once you modify something substantially and add your marker
> you also need to sign off on it.
>
>> ---
>>
>> Christoph, Darrick, could you please review/ack this patch again? I
>> needed to rewrite the xfs_falloc_zero_range() bits, because it
>> conflicted with 66d78a11479c and 8dc15b7a6e59. This version aims
>> mostly
>> to remove one of the if-else nested levels to keep it a bit cleaner.
>
> Maybe mention the "merge conflict" in the above note?
>
>> index d36a9aafa8ab..b23f1373116e 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -1302,16 +1302,29 @@ xfs_falloc_zero_range(
>>
>> if (xfs_falloc_force_zero(ip, ac)) {
>> error = xfs_zero_range(ip, offset, len, ac, NULL);
>> + goto out;
>> + }
>>
>> + error = xfs_free_file_space(ip, offset, len, ac);
>> + if (error)
>> + return error;
>> +
>> + len = round_up(offset + len, blksize) - round_down(offset, blksize);
>> + offset = round_down(offset, blksize);
>> +
>> + if (mode & FALLOC_FL_WRITE_ZEROES) {
>> + if (xfs_is_always_cow_inode(ip) ||
>> + !bdev_write_zeroes_unmap_sectors(
>> + xfs_inode_buftarg(ip)->bt_bdev))
>> + return -EOPNOTSUPP;
>> + error = xfs_alloc_file_space(ip, offset, len,
>> + XFS_BMAPI_ZERO);
>
> Darrick made a good point that we should check the not supported cases
> earlier, even if that is an issue in the original version. Also I
> don't
> think we should hit the force zero case for FALLOC_FL_WRITE_ZEROES.
> I.e., this should probably become something like:
>
> if (mode & FALLOC_FL_WRITE_ZEROES) {
> if (xfs_is_always_cow_inode(ip) ||
> !bdev_write_zeroes_unmap_sectors(
> xfs_inode_buftarg(ip)->bt_bdev))
> return -EOPNOTSUPP;
> bmapi_flags = XFS_BMAPI_ZERO;
> } else {
> if (xfs_falloc_force_zero(ip, ac)) {
> error = xfs_zero_range(ip, offset, len, ac, NULL);
> goto set_filesize;
> }
> bmapi_flags = XFS_BMAPI_PREALLOC;
> }
>
> < free file space, round, etc.. >
>
> error = xfs_alloc_file_space(ip, offset, len, bmapi_flags);
ugh I missed this one, I will add the Darrick/Christoph earlier check
and rebase to more recent version.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-01-29 7:47 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-20 13:20 [PATCH v7] xfs: add FALLOC_FL_WRITE_ZEROES to XFS code base cem
2026-01-20 15:57 ` Darrick J. Wong
2026-01-21 6:56 ` Christoph Hellwig
2026-01-29 7:39 ` lukas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox