From: Zhang Yi <yi.zhang@huaweicloud.com>
To: linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
linux-block@vger.kernel.org, dm-devel@lists.linux.dev,
linux-nvme@lists.infradead.org, linux-scsi@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, linux-kernel@vger.kernel.org,
hch@lst.de, tytso@mit.edu, djwong@kernel.org,
john.g.garry@oracle.com, bmarzins@redhat.com,
chaitanyak@nvidia.com, shinichiro.kawasaki@wdc.com,
brauner@kernel.org, martin.petersen@oracle.com,
yi.zhang@huawei.com, yi.zhang@huaweicloud.com,
chengzhihao1@huawei.com, yukuai3@huawei.com,
yangerkun@huawei.com
Subject: [PATCH 10/10] ext4: add FALLOC_FL_WRITE_ZEROES support
Date: Wed, 4 Jun 2025 10:08:50 +0800 [thread overview]
Message-ID: <20250604020850.1304633-11-yi.zhang@huaweicloud.com> (raw)
In-Reply-To: <20250604020850.1304633-1-yi.zhang@huaweicloud.com>
From: Zhang Yi <yi.zhang@huawei.com>
Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
the unmap write zeroes operation. This first allocates blocks as
unwritten, then issues a zero command outside of the running journal
handle, and finally converts them to a written state.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/extents.c | 66 ++++++++++++++++++++++++++++++-------
include/trace/events/ext4.h | 3 +-
2 files changed, 57 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b543a46fc809..29ce9f6287d0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4501,6 +4501,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
struct ext4_map_blocks map;
unsigned int credits;
loff_t epos, old_size = i_size_read(inode);
+ unsigned int blkbits = inode->i_blkbits;
+ bool alloc_zero = false;
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset;
@@ -4513,6 +4515,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
+ /*
+ * Do the actual write zero during a running journal transaction
+ * costs a lot. First allocate an unwritten extent and then
+ * convert it to written after zeroing it out.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO) {
+ flags &= ~EXT4_GET_BLOCKS_ZERO;
+ flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
+ alloc_zero = true;
+ }
+
/*
* credits to insert 1 extent into extent tree
*/
@@ -4549,9 +4562,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
* allow a full retry cycle for any remaining allocations
*/
retries = 0;
- map.m_lblk += ret;
- map.m_len = len = len - ret;
- epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ epos = (loff_t)(map.m_lblk + ret) << blkbits;
inode_set_ctime_current(inode);
if (new_size) {
if (epos > new_size)
@@ -4571,6 +4582,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ret2 = ret3 ? ret3 : ret2;
if (unlikely(ret2))
break;
+
+ if (alloc_zero &&
+ (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
+ ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
+ map.m_len);
+ if (likely(!ret2))
+ ret2 = ext4_convert_unwritten_extents(NULL,
+ inode, (loff_t)map.m_lblk << blkbits,
+ (loff_t)map.m_len << blkbits);
+ if (ret2)
+ break;
+ }
+
+ map.m_lblk += ret;
+ map.m_len = len = len - ret;
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -4636,7 +4662,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (end_lblk > start_lblk) {
ext4_lblk_t zero_blks = end_lblk - start_lblk;
- flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
+ if (mode & FALLOC_FL_WRITE_ZEROES)
+ flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
+ else
+ flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ EXT4_EX_NOCACHE);
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
new_size, flags);
if (ret)
@@ -4745,11 +4775,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (IS_ENCRYPTED(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
+ /*
+ * Don't allow writing zeroes if the underlying device does not
+ * enable the unmap write zeroes operation.
+ */
+ if (!bdev_write_zeroes_unmap(inode->i_sb->s_bdev) &&
+ (mode & FALLOC_FL_WRITE_ZEROES))
+ return -EOPNOTSUPP;
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
- FALLOC_FL_INSERT_RANGE))
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
+ FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
return -EOPNOTSUPP;
inode_lock(inode);
@@ -4780,16 +4817,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret)
goto out_invalidate_lock;
- if (mode & FALLOC_FL_PUNCH_HOLE)
+ switch (mode & FALLOC_FL_MODE_MASK) {
+ case FALLOC_FL_PUNCH_HOLE:
ret = ext4_punch_hole(file, offset, len);
- else if (mode & FALLOC_FL_COLLAPSE_RANGE)
+ break;
+ case FALLOC_FL_COLLAPSE_RANGE:
ret = ext4_collapse_range(file, offset, len);
- else if (mode & FALLOC_FL_INSERT_RANGE)
+ break;
+ case FALLOC_FL_INSERT_RANGE:
ret = ext4_insert_range(file, offset, len);
- else if (mode & FALLOC_FL_ZERO_RANGE)
+ break;
+ case FALLOC_FL_ZERO_RANGE:
+ case FALLOC_FL_WRITE_ZEROES:
ret = ext4_zero_range(file, offset, len, mode);
- else
+ break;
+ default:
ret = -EOPNOTSUPP;
+ }
out_invalidate_lock:
filemap_invalidate_unlock(mapping);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 156908641e68..6f9cf2811733 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
- { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}, \
+ { FALLOC_FL_WRITE_ZEROES, "WRITE_ZEROES"})
TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
--
2.46.1
next prev parent reply other threads:[~2025-06-04 2:21 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-04 2:08 [PATCH 00/10] fallocate: introduce FALLOC_FL_WRITE_ZEROES flag Zhang Yi
2025-06-04 2:08 ` [PATCH 01/10] block: introduce BLK_FEAT_WRITE_ZEROES_UNMAP to queue limits features Zhang Yi
2025-06-11 6:09 ` Christoph Hellwig
2025-06-11 7:31 ` Zhang Yi
2025-06-12 4:47 ` Christoph Hellwig
2025-06-12 11:20 ` Zhang Yi
2025-06-12 15:03 ` Darrick J. Wong
2025-06-13 3:15 ` Zhang Yi
2025-06-13 5:56 ` Christoph Hellwig
2025-06-13 14:54 ` Darrick J. Wong
2025-06-14 4:48 ` Zhang Yi
2025-06-16 5:39 ` Christoph Hellwig
2025-06-04 2:08 ` [PATCH 02/10] nvme: set BLK_FEAT_WRITE_ZEROES_UNMAP if device supports DEAC bit Zhang Yi
2025-06-04 2:08 ` [PATCH 03/10] nvme-multipath: add BLK_FEAT_WRITE_ZEROES_UNMAP support Zhang Yi
2025-06-04 2:08 ` [PATCH 04/10] nvmet: set WZDS and DRB if device supports BLK_FEAT_WRITE_ZEROES_UNMAP Zhang Yi
2025-06-04 2:08 ` [PATCH 05/10] scsi: sd: set BLK_FEAT_WRITE_ZEROES_UNMAP if device supports unmap zeroing mode Zhang Yi
2025-06-04 2:08 ` [PATCH 06/10] dm: add BLK_FEAT_WRITE_ZEROES_UNMAP support Zhang Yi
2025-06-04 2:08 ` [PATCH 07/10] fs: introduce FALLOC_FL_WRITE_ZEROES to fallocate Zhang Yi
2025-06-11 15:05 ` Darrick J. Wong
2025-06-12 11:37 ` Zhang Yi
2025-06-04 2:08 ` [PATCH 08/10] block: factor out common part in blkdev_fallocate() Zhang Yi
2025-06-04 2:08 ` [PATCH 09/10] block: add FALLOC_FL_WRITE_ZEROES support Zhang Yi
2025-06-11 6:10 ` Christoph Hellwig
2025-06-04 2:08 ` Zhang Yi [this message]
2025-06-10 1:47 ` [PATCH 00/10] fallocate: introduce FALLOC_FL_WRITE_ZEROES flag Martin K. Petersen
2025-06-16 14:27 ` Christian Brauner
2025-06-16 16:59 ` Martin K. Petersen
2025-06-17 2:25 ` Zhang Yi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250604020850.1304633-11-yi.zhang@huaweicloud.com \
--to=yi.zhang@huaweicloud.com \
--cc=bmarzins@redhat.com \
--cc=brauner@kernel.org \
--cc=chaitanyak@nvidia.com \
--cc=chengzhihao1@huawei.com \
--cc=djwong@kernel.org \
--cc=dm-devel@lists.linux.dev \
--cc=hch@lst.de \
--cc=john.g.garry@oracle.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=linux-scsi@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=shinichiro.kawasaki@wdc.com \
--cc=tytso@mit.edu \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).