[PATCH 0/5] exfat: convert to iomap

public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/5] exfat: convert to iomap
@ 2026-03-26 11:50 Namjae Jeon
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
                   ` (5 more replies)
  0 siblings, 6 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

This patch series converts the exfat filesystem to the iomap framework for
buffered I/O, direct I/O, and llseek (SEEK_HOLE/SEEK_DATA) support.

Namjae Jeon (5):
  exfat: add iomap support
  exfat: add iomap direct I/O support
  exfat: add iomap buffered I/O support
  exfat: add support for multi-cluster allocation
  exfat: add support for SEEK_HOLE and SEEK_DATA in llseek

 fs/exfat/Makefile   |   2 +-
 fs/exfat/dir.c      |   2 +-
 fs/exfat/exfat_fs.h |  17 ++-
 fs/exfat/fatent.c   |  26 ++--
 fs/exfat/file.c     | 243 ++++++++++++++++++++++---------
 fs/exfat/inode.c    | 341 +++++---------------------------------------
 fs/exfat/iomap.c    | 305 +++++++++++++++++++++++++++++++++++++++
 fs/exfat/iomap.h    |  16 +++
 fs/exfat/namei.c    |   2 +-
 fs/exfat/super.c    |   1 +
 10 files changed, 565 insertions(+), 390 deletions(-)
 create mode 100644 fs/exfat/iomap.c
 create mode 100644 fs/exfat/iomap.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
@ 2026-03-26 11:50 ` Namjae Jeon
  2026-03-30  2:45   ` Chi Zhiling
                     ` (4 more replies)
  2026-03-26 11:50 ` [PATCH 2/5] exfat: add iomap direct I/O support Namjae Jeon
                   ` (4 subsequent siblings)
  5 siblings, 5 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

Add iomap support to the exfat filesystem. This patch introduces the
necessary iomap infrastructure by adding a new iomap.c file and related
iomap operations. The main change is converting exfat_extend_valid_size()
to use iomap_zero_range() instead of the legacy write_begin/write_end path.
To support this, exfat_map_cluster() is extended to return whether a new
cluster was allocated via a balloc flag, and a new helper function
exfat_cluster_to_phys() is added. Also, data_start_bytes is added to
struct exfat_sb_info for easier conversion from cluster number to physical
byte offset.

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/Makefile   |   2 +-
 fs/exfat/exfat_fs.h |  12 ++
 fs/exfat/file.c     |  54 +++-----
 fs/exfat/inode.c    |   9 +-
 fs/exfat/iomap.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++
 fs/exfat/iomap.h    |  16 +++
 fs/exfat/super.c    |   1 +
 7 files changed, 361 insertions(+), 38 deletions(-)
 create mode 100644 fs/exfat/iomap.c
 create mode 100644 fs/exfat/iomap.h

diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
index ed51926a4971..e06bf85870ae 100644
--- a/fs/exfat/Makefile
+++ b/fs/exfat/Makefile
@@ -5,4 +5,4 @@
 obj-$(CONFIG_EXFAT_FS) += exfat.o
 
 exfat-y	:= inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \
-	   file.o balloc.o
+	   file.o balloc.o iomap.o
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 9fed9fb33cae..860f2e438b63 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -259,6 +259,7 @@ struct exfat_sb_info {
 	unsigned long long FAT1_start_sector; /* FAT1 start sector */
 	unsigned long long FAT2_start_sector; /* FAT2 start sector */
 	unsigned long long data_start_sector; /* data area start sector */
+	unsigned long long data_start_bytes;
 	unsigned int num_FAT_sectors; /* num of FAT sectors */
 	unsigned int root_dir; /* root dir cluster */
 	unsigned int dentries_per_clu; /* num of dentries per cluster */
@@ -432,6 +433,13 @@ static inline loff_t exfat_ondisk_size(const struct inode *inode)
 	return ((loff_t)inode->i_blocks) << 9;
 }
 
+static inline loff_t exfat_cluster_to_phys(struct exfat_sb_info *sbi,
+		unsigned int clus)
+{
+	return ((loff_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->cluster_size_bits) +
+		sbi->data_start_bytes;
+}
+
 /* super.c */
 int exfat_set_volume_dirty(struct super_block *sb);
 int exfat_clear_volume_dirty(struct super_block *sb);
@@ -480,6 +488,7 @@ long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long exfat_compat_ioctl(struct file *filp, unsigned int cmd,
 				unsigned long arg);
 int exfat_force_shutdown(struct super_block *sb, u32 flags);
+int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync);
 
 /* namei.c */
 extern const struct dentry_operations exfat_dentry_ops;
@@ -543,6 +552,9 @@ int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
 int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+		unsigned int *clu, unsigned int *count, int create,
+		bool *balloc);
 
 /* exfat/nls.c */
 unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 2daf0dbabb24..756846b774c4 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -14,9 +14,11 @@
 #include <linux/writeback.h>
 #include <linux/filelock.h>
 #include <linux/falloc.h>
+#include <linux/iomap.h>
 
 #include "exfat_raw.h"
 #include "exfat_fs.h"
+#include "iomap.h"
 
 static int exfat_cont_expand(struct inode *inode, loff_t size)
 {
@@ -628,44 +630,28 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
 
-static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
+int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
 {
-	int err;
-	loff_t pos;
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	struct address_space *mapping = inode->i_mapping;
-	const struct address_space_operations *ops = mapping->a_ops;
-
-	pos = ei->valid_size;
-	while (pos < new_valid_size) {
-		u32 len;
-		struct folio *folio;
-		unsigned long off;
-
-		len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
-		if (pos + len > new_valid_size)
-			len = new_valid_size - pos;
-
-		err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
-		if (err)
-			goto out;
-
-		off = offset_in_folio(folio, pos);
-		folio_zero_new_buffers(folio, off, off + len);
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	loff_t old_valid_size;
+	int ret = 0;
 
-		err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
-		if (err < 0)
-			goto out;
-		pos += len;
+	mutex_lock(&sbi->s_lock);
+	old_valid_size = ei->valid_size;
+	mutex_unlock(&sbi->s_lock);
 
-		balance_dirty_pages_ratelimited(mapping);
-		cond_resched();
+	if (old_valid_size < off) {
+		ret = iomap_zero_range(inode, old_valid_size,
+				off - old_valid_size, NULL,
+				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
+				NULL);
+		if (!ret && bsync)
+			ret = filemap_write_and_wait_range(inode->i_mapping,
+					old_valid_size, off - 1);
 	}
 
-	return 0;
-
-out:
-	return err;
+	return ret;
 }
 
 static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -702,7 +688,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	}
 
 	if (pos > valid_size) {
-		ret = exfat_extend_valid_size(inode, pos);
+		ret = exfat_extend_valid_size(inode, pos, false);
 		if (ret < 0 && ret != -ENOSPC) {
 			exfat_err(inode->i_sb,
 				"write: fail to zero from %llu to %llu(%zd)",
@@ -760,7 +746,7 @@ static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 	new_valid_size = min(new_valid_size, i_size_read(inode));
 
 	if (ei->valid_size < new_valid_size) {
-		err = exfat_extend_valid_size(inode, new_valid_size);
+		err = exfat_extend_valid_size(inode, new_valid_size, false);
 		if (err < 0) {
 			inode_unlock(inode);
 			return vmf_fs_error(err);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index beb9ea7cca9f..cc54cce65a31 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -123,8 +123,9 @@ void exfat_sync_inode(struct inode *inode)
  * Output: errcode, cluster number
  * *clu = (~0), if it's unable to allocate a new cluster
  */
-static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
-		unsigned int *clu, unsigned int *count, int create)
+int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+		unsigned int *clu, unsigned int *count, int create,
+		bool *balloc)
 {
 	int ret;
 	unsigned int last_clu;
@@ -235,6 +236,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 			}
 		}
 		*count = 1;
+		*balloc = true;
 	}
 
 	/* hint information */
@@ -258,6 +260,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
 	sector_t phys = 0;
 	sector_t valid_blks;
 	loff_t i_size;
+	bool balloc;
 
 	mutex_lock(&sbi->s_lock);
 	i_size = i_size_read(inode);
@@ -268,7 +271,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
 	/* Is this block already allocated? */
 	count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
 	err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
-			&cluster, &count, create);
+			&cluster, &count, create, &balloc);
 	if (err) {
 		if (err != -ENOSPC)
 			exfat_fs_error_ratelimit(sb,
diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
new file mode 100644
index 000000000000..e4135a13454f
--- /dev/null
+++ b/fs/exfat/iomap.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * iomap callack functions
+ *
+ * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/iomap.h>
+#include <linux/pagemap.h>
+
+#include "exfat_raw.h"
+#include "exfat_fs.h"
+#include "iomap.h"
+
+/*
+ * exfat_iomap_put_folio - Put folio after iomap operation
+ *
+ * Called when iomap is finished with a folio zero-fills portions of
+ * the folio beyond ->valid_size to prevent exposing uninitialized data.
+ */
+static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
+		unsigned int len, struct folio *folio)
+{
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	unsigned long sector_size = 1UL << inode->i_blkbits;
+	loff_t start_down, end_up, init;
+
+	mutex_lock(&sbi->s_lock);
+	start_down = round_down(pos, sector_size);
+	end_up = (pos + len - 1) | (sector_size - 1);
+	init = ei->valid_size;
+
+	if (init >= start_down && init <= end_up) {
+		if (init < pos) {
+			loff_t offset = offset_in_folio(folio, pos + len);
+
+			if (offset == 0)
+				offset = folio_size(folio);
+			folio_zero_segments(folio,
+					offset_in_folio(folio, init),
+					offset_in_folio(folio, pos),
+					offset,
+					folio_size(folio));
+
+		} else  {
+			loff_t offset = max_t(loff_t, pos + len, init);
+
+			offset = offset_in_folio(folio, offset);
+			if (offset == 0)
+				offset = folio_size(folio);
+			folio_zero_segment(folio,
+					offset,
+					folio_size(folio));
+		}
+	} else if (init <= pos) {
+		loff_t offset = 0, offset2 = offset_in_folio(folio, pos + len);
+
+		if ((init >> folio_shift(folio)) == (pos >> folio_shift(folio)))
+			offset = offset_in_folio(folio, init);
+		if (offset2 == 0)
+			offset2 = folio_size(folio);
+		folio_zero_segments(folio,
+				offset,
+				offset_in_folio(folio, pos),
+				offset2,
+				folio_size(folio));
+	}
+
+	folio_unlock(folio);
+	folio_put(folio);
+	mutex_unlock(&sbi->s_lock);
+}
+
+const struct iomap_write_ops exfat_iomap_folio_ops = {
+	.put_folio = exfat_iomap_put_folio,
+};
+
+/*
+ * exfat_file_write_dio_end_io - Direct I/O write completion handler
+ *
+ * Updates i_size if the write extended the file. Called from the dio layer
+ * after I/O completion.
+ */
+static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+		int error, unsigned int flags)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	if (error)
+		return error;
+
+	if (size && i_size_read(inode) < iocb->ki_pos + size) {
+		i_size_write(inode, iocb->ki_pos + size);
+		mark_inode_dirty(inode);
+	}
+
+	return 0;
+}
+
+const struct iomap_dio_ops exfat_write_dio_ops = {
+	.end_io		= exfat_file_write_dio_end_io,
+};
+
+/*
+ * exfat_read_iomap_begin - Begin mapping for reads
+ *
+ * Maps file range to disk location for read operations (read folio,
+ * readahead, direct I/O read, etc.).
+ *
+ * Returns IOMAP_MAPPED for areas within ->valid_size, and IOMAP_UNWRITTEN
+ * for allocated but uninitialized regions beyond ->valid_size.
+ */
+static int exfat_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	unsigned int cluster, num_clusters = EXFAT_B_TO_CLU_ROUND_UP(length, sbi);
+	loff_t cluster_offset, cluster_length;
+	int err = 0;
+	bool balloc = false;
+
+	mutex_lock(&sbi->s_lock);
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = offset;
+
+	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
+			&cluster, &num_clusters, false, &balloc);
+	if (err)
+		goto out;
+
+	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
+	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
+	if (length > cluster_length - cluster_offset)
+		iomap->length = cluster_length - cluster_offset;
+	else
+		iomap->length = length;
+
+	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
+	if (offset >= ei->valid_size)
+		iomap->type = IOMAP_UNWRITTEN;
+	else
+		iomap->type = IOMAP_MAPPED;
+
+	if (!(flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED &&
+	    iomap->offset < ei->valid_size &&
+	    iomap->offset + iomap->length > ei->valid_size) {
+		iomap->length = round_up(ei->valid_size, 1 << inode->i_blkbits) -
+			iomap->offset;
+	}
+
+	iomap->flags |= IOMAP_F_MERGED;
+out:
+	mutex_unlock(&sbi->s_lock);
+	return err;
+}
+
+const struct iomap_ops exfat_read_iomap_ops = {
+	.iomap_begin = exfat_read_iomap_begin,
+};
+
+/*
+ * __exfat_write_iomap_begin - mapping logic for writes
+ *
+ * Maps the requested range and allocates clusters if needed.
+ */
+static int __exfat_write_iomap_begin(struct inode *inode, loff_t offset,
+		loff_t length, struct iomap *iomap)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	unsigned int cluster, num_clusters;
+	loff_t cluster_offset, cluster_length;
+	int err;
+	bool balloc = false;
+
+	num_clusters = max(EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) -
+		EXFAT_B_TO_CLU_ROUND_UP(offset, sbi), 1);
+	mutex_lock(&sbi->s_lock);
+	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
+			&cluster, &num_clusters, true, &balloc);
+	if (err)
+		goto out;
+
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = offset;
+
+	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
+	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
+	if (length > cluster_length - cluster_offset)
+		iomap->length = cluster_length - cluster_offset;
+	else
+		iomap->length = length;
+	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
+	iomap->type = IOMAP_MAPPED;
+	if (balloc)
+		iomap->flags = IOMAP_F_NEW;
+out:
+	mutex_unlock(&sbi->s_lock);
+	return err;
+}
+
+/*
+ * exfat_write_iomap_begin - Mapping for write operations
+ *
+ * Extends ->valid_size if the write starts beyond current initialized size.
+ * Then performs actual block mapping (possibly allocating clusters).
+ */
+static int exfat_write_iomap_begin(struct inode *inode, loff_t offset,
+		loff_t length, unsigned int flags, struct iomap *iomap,
+		struct iomap *srcmap)
+{
+	int ret;
+
+	if (EXFAT_I(inode)->valid_size < offset) {
+		ret = exfat_extend_valid_size(inode, offset,
+				flags & IOMAP_DIRECT ? true : false);
+		if (ret)
+			return ret;
+	}
+
+	ret = __exfat_write_iomap_begin(inode, offset, length, iomap);
+
+	if (!(flags & IOMAP_DIRECT) && !ret &&
+	    i_size_read(inode) < iomap->offset + iomap->length) {
+		i_size_write(inode, iomap->offset + iomap->length);
+		mark_inode_dirty(inode);
+	}
+
+	return ret;
+}
+
+/*
+ * exfat_write_iomap_end - Update the state after write
+ *
+ * Extends ->valid_size to cover the newly written range.
+ * Marks the inode dirty if metadata was changed.
+ */
+static int exfat_write_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+		ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+	if (written) {
+		struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+		struct exfat_inode_info *ei = EXFAT_I(inode);
+		bool dirtied = false;
+		loff_t end = pos + written;
+
+		mutex_lock(&sbi->s_lock);
+		if (ei->valid_size < end) {
+			ei->valid_size = end;
+			dirtied = true;
+		}
+		mutex_unlock(&sbi->s_lock);
+		if (dirtied)
+			mark_inode_dirty(inode);
+	}
+
+	return written;
+}
+
+const struct iomap_ops exfat_write_iomap_ops = {
+	.iomap_begin	= exfat_write_iomap_begin,
+	.iomap_end	= exfat_write_iomap_end,
+};
+
+static int exfat_mkwrite_iomap_begin(struct inode *inode, loff_t offset,
+		loff_t length, unsigned int flags, struct iomap *iomap,
+		struct iomap *srcmap)
+{
+	return __exfat_write_iomap_begin(inode, offset, length, iomap);
+}
+
+const struct iomap_ops exfat_mkwrite_iomap_ops = {
+	.iomap_begin	= exfat_mkwrite_iomap_begin,
+	.iomap_end	= exfat_write_iomap_end,
+};
+
+/*
+ * exfat_writeback_range - Map folio during writeback
+ *
+ * Called for each folio during writeback. If the folio falls outside the
+ * current iomap, remaps by calling read_iomap_begin.
+ */
+static ssize_t exfat_writeback_range(struct iomap_writepage_ctx *wpc,
+		struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
+{
+	if (offset < wpc->iomap.offset ||
+	    offset >= wpc->iomap.offset + wpc->iomap.length) {
+		int error;
+
+		error = exfat_read_iomap_begin(wpc->inode, offset, len,
+				0, &wpc->iomap, NULL);
+		if (error)
+			return error;
+	}
+
+	return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
+}
+
+const struct iomap_writeback_ops exfat_writeback_ops = {
+	.writeback_range	= exfat_writeback_range,
+	.writeback_submit	= iomap_ioend_writeback_submit,
+};
diff --git a/fs/exfat/iomap.h b/fs/exfat/iomap.h
new file mode 100644
index 000000000000..4abe0dc452ee
--- /dev/null
+++ b/fs/exfat/iomap.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2026 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#ifndef _LINUX_EXFAT_IOMAP_H
+#define _LINUX_EXFAT_IOMAP_H
+
+extern const struct iomap_write_ops exfat_iomap_folio_ops;
+extern const struct iomap_ops exfat_read_iomap_ops;
+extern const struct iomap_ops exfat_write_iomap_ops;
+extern const struct iomap_dio_ops exfat_write_dio_ops;
+extern const struct iomap_writeback_ops exfat_writeback_ops;
+extern const struct iomap_ops exfat_mkwrite_iomap_ops;
+
+#endif /* _LINUX_EXFAT_IOMAP_H */
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 83396fd265cd..b69c4b0a926b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
 	if (p_boot->num_fats == 2)
 		sbi->FAT2_start_sector += sbi->num_FAT_sectors;
 	sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
+	sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;
 	sbi->num_sectors = le64_to_cpu(p_boot->vol_length);
 	/* because the cluster index starts with 2 */
 	sbi->num_clusters = le32_to_cpu(p_boot->clu_count) +
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/5] exfat: add iomap direct I/O support
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
@ 2026-03-26 11:50 ` Namjae Jeon
  2026-03-30  6:33   ` Christoph Hellwig
  2026-03-26 11:50 ` [PATCH 3/5] exfat: add iomap buffered " Namjae Jeon
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

Add iomap-based direct I/O support to the exfat filesystem. This replaces
the previous exfat_direct_IO() implementation that used
blockdev_direct_IO() with the modern iomap_dio_rw() interface.

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/file.c  | 30 ++++++++++++++++++++++++++++--
 fs/exfat/inode.c | 45 +--------------------------------------------
 2 files changed, 29 insertions(+), 46 deletions(-)

diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 756846b774c4..2a9263b4433b 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -698,7 +698,13 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 			goto unlock;
 	}
 
-	ret = __generic_file_write_iter(iocb, iter);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = iomap_dio_rw(iocb, iter, &exfat_write_iomap_ops,
+				&exfat_write_dio_ops, 0, NULL, 0);
+		if (ret == -ENOTBLK)
+			ret = 0;
+	} else
+		ret = __generic_file_write_iter(iocb, iter);
 	if (ret < 0)
 		goto unlock;
 
@@ -725,11 +731,31 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
 
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
-	return generic_file_read_iter(iocb, iter);
+	inode_lock_shared(inode);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		size_t count = iov_iter_count(iter);
+
+		if ((iocb->ki_pos | count) & (inode->i_sb->s_blocksize - 1)) {
+			ret = -EINVAL;
+			goto inode_unlock;
+		}
+
+		file_accessed(iocb->ki_filp);
+		ret = iomap_dio_rw(iocb, iter, &exfat_read_iomap_ops, NULL, 0,
+				NULL, 0);
+	} else {
+		ret = generic_file_read_iter(iocb, iter);
+	}
+
+inode_unlock:
+	inode_unlock_shared(inode);
+
+	return ret;
 }
 
 static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index cc54cce65a31..2985b5d736f6 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -478,49 +478,6 @@ static int exfat_write_end(const struct kiocb *iocb,
 	return err;
 }
 
-static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct address_space *mapping = iocb->ki_filp->f_mapping;
-	struct inode *inode = mapping->host;
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t pos = iocb->ki_pos;
-	loff_t size = pos + iov_iter_count(iter);
-	int rw = iov_iter_rw(iter);
-	ssize_t ret;
-
-	/*
-	 * Need to use the DIO_LOCKING for avoiding the race
-	 * condition of exfat_get_block() and ->truncate().
-	 */
-	ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
-	if (ret < 0) {
-		if (rw == WRITE && ret != -EIOCBQUEUED)
-			exfat_write_failed(mapping, size);
-
-		return ret;
-	}
-
-	size = pos + ret;
-
-	if (rw == WRITE) {
-		/*
-		 * If the block had been partially written before this write,
-		 * ->valid_size will not be updated in exfat_get_block(),
-		 * update it here.
-		 */
-		if (ei->valid_size < size) {
-			ei->valid_size = size;
-			mark_inode_dirty(inode);
-		}
-	} else if (pos < ei->valid_size && ei->valid_size < size) {
-		/* zero the unwritten part in the partially written block */
-		iov_iter_revert(iter, size - ei->valid_size);
-		iov_iter_zero(size - ei->valid_size, iter);
-	}
-
-	return ret;
-}
-
 static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
 {
 	sector_t blocknr;
@@ -552,7 +509,7 @@ static const struct address_space_operations exfat_aops = {
 	.writepages	= exfat_writepages,
 	.write_begin	= exfat_write_begin,
 	.write_end	= exfat_write_end,
-	.direct_IO	= exfat_direct_IO,
+	.direct_IO	= noop_direct_IO,
 	.bmap		= exfat_aop_bmap,
 	.migrate_folio	= buffer_migrate_folio,
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
  2026-03-26 11:50 ` [PATCH 2/5] exfat: add iomap direct I/O support Namjae Jeon
@ 2026-03-26 11:50 ` Namjae Jeon
  2026-03-30  6:38   ` Christoph Hellwig
  2026-04-06 13:09   ` David Timber
  2026-03-26 11:50 ` [PATCH 4/5] exfat: add support for multi-cluster allocation Namjae Jeon
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

Add full buffered I/O support using the iomap framework to the exfat
filesystem. This replaces the old exfat_get_block(), exfat_write_begin(),
exfat_write_end(), and exfat_block_truncate_page() functions with their
iomap equivalents. Buffered writes now use iomap_file_buffered_write(),
read uses iomap_bio_read_folio() and iomap_bio_readahead(), and writeback
is handled through iomap_writepages().

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/exfat_fs.h |   3 +-
 fs/exfat/file.c     | 160 +++++++++++++++++++--------
 fs/exfat/inode.c    | 261 ++++----------------------------------------
 3 files changed, 142 insertions(+), 282 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 860f2e438b63..54da001a8f55 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -87,7 +87,7 @@ enum {
 /*
  * helpers for cluster size to byte conversion.
  */
-#define EXFAT_CLU_TO_B(b, sbi)		((b) << (sbi)->cluster_size_bits)
+#define EXFAT_CLU_TO_B(b, sbi)		((loff_t)(b) << (sbi)->cluster_size_bits)
 #define EXFAT_B_TO_CLU(b, sbi)		((b) >> (sbi)->cluster_size_bits)
 #define EXFAT_B_TO_CLU_ROUND_UP(b, sbi)	\
 	(((b - 1) >> (sbi)->cluster_size_bits) + 1)
@@ -551,7 +551,6 @@ struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
 int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
-int exfat_block_truncate_page(struct inode *inode, loff_t from);
 int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 		unsigned int *clu, unsigned int *count, int create,
 		bool *balloc);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 2a9263b4433b..5f85e2e0a71e 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -337,7 +337,18 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    attr->ia_size > i_size_read(inode)) {
+		loff_t old_size = i_size_read(inode);
+
 		error = exfat_cont_expand(inode, attr->ia_size);
+		if (!error && attr->ia_size > old_size &&
+		    old_size % PAGE_SIZE != 0) {
+			loff_t len = min_t(loff_t,
+					round_up(old_size, PAGE_SIZE) - old_size,
+					attr->ia_size - old_size);
+			error = iomap_zero_range(inode, old_size, len,
+					NULL, &exfat_read_iomap_ops,
+					&exfat_iomap_folio_ops, NULL);
+		}
 		if (error || attr->ia_valid == ATTR_SIZE)
 			return error;
 		attr->ia_valid &= ~ATTR_SIZE;
@@ -384,7 +395,10 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	exfat_truncate_inode_atime(inode);
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		error = exfat_block_truncate_page(inode, attr->ia_size);
+		inode_dio_wait(inode);
+		error = iomap_truncate_page(inode, attr->ia_size, NULL,
+				&exfat_read_iomap_ops,
+				&exfat_iomap_folio_ops, NULL);
 		if (error)
 			goto out;
 
@@ -619,10 +633,14 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
 
-	err = __generic_file_fsync(filp, start, end, datasync);
+	err = file_write_and_wait_range(filp, start, end);
 	if (err)
 		return err;
 
+	if (!datasync)
+		err = __exfat_write_inode(inode, 1);
+	write_inode_now(inode, !datasync);
+
 	err = sync_blockdev(inode->i_sb->s_bdev);
 	if (err)
 		return err;
@@ -648,12 +666,56 @@ int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
 				NULL);
 		if (!ret && bsync)
 			ret = filemap_write_and_wait_range(inode->i_mapping,
-					old_valid_size, off - 1);
+							   old_valid_size,
+							   off - 1);
 	}
 
 	return ret;
 }
 
+static ssize_t exfat_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	ssize_t ret;
+
+	ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
+			&exfat_write_dio_ops, 0, NULL, 0);
+	if (ret == -ENOTBLK)
+		ret = 0;
+	else if (ret < 0)
+		goto out;
+
+	if (iov_iter_count(from)) {
+		loff_t offset, end;
+		ssize_t written;
+		int ret2;
+
+		offset = iocb->ki_pos;
+		iocb->ki_flags &= ~IOCB_DIRECT;
+		written = iomap_file_buffered_write(iocb, from,
+				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
+				NULL);
+		if (written < 0) {
+			ret = written;
+			goto out;
+		}
+
+		ret += written;
+		end = iocb->ki_pos + written - 1;
+		ret2 = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
+				offset, end);
+		if (ret2) {
+			ret = -EIO;
+			goto out;
+		}
+		if (!ret2)
+			invalidate_mapping_pages(iocb->ki_filp->f_mapping,
+					offset >> PAGE_SHIFT,
+					end >> PAGE_SHIFT);
+	}
+out:
+	return ret;
+}
+
 static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	ssize_t ret;
@@ -662,6 +724,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	struct exfat_inode_info *ei = EXFAT_I(inode);
 	loff_t pos = iocb->ki_pos;
 	loff_t valid_size;
+	int err;
 
 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
 		return -EIO;
@@ -677,34 +740,18 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret <= 0)
 		goto unlock;
 
-	if (iocb->ki_flags & IOCB_DIRECT) {
-		unsigned long align = pos | iov_iter_alignment(iter);
-
-		if (!IS_ALIGNED(align, i_blocksize(inode)) &&
-		    !IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-	}
-
-	if (pos > valid_size) {
-		ret = exfat_extend_valid_size(inode, pos, false);
-		if (ret < 0 && ret != -ENOSPC) {
-			exfat_err(inode->i_sb,
-				"write: fail to zero from %llu to %llu(%zd)",
-				valid_size, pos, ret);
-		}
-		if (ret < 0)
-			goto unlock;
+	err = file_modified(iocb->ki_filp);
+	if (err) {
+		ret = err;
+		goto unlock;
 	}
 
-	if (iocb->ki_flags & IOCB_DIRECT) {
-		ret = iomap_dio_rw(iocb, iter, &exfat_write_iomap_ops,
-				&exfat_write_dio_ops, 0, NULL, 0);
-		if (ret == -ENOTBLK)
-			ret = 0;
-	} else
-		ret = __generic_file_write_iter(iocb, iter);
+	if (iocb->ki_flags & IOCB_DIRECT)
+		ret = exfat_dio_write_iter(iocb, iter);
+	else
+		ret = iomap_file_buffered_write(iocb, iter,
+				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
+				NULL);
 	if (ret < 0)
 		goto unlock;
 
@@ -737,6 +784,7 @@ static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		return -EIO;
 
 	inode_lock_shared(inode);
+
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		size_t count = iov_iter_count(iter);
 
@@ -760,28 +808,22 @@ static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 {
-	int err;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t new_valid_size;
+	vm_fault_t ret;
 
 	if (!inode_trylock(inode))
 		return VM_FAULT_RETRY;
 
-	new_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
-	new_valid_size = min(new_valid_size, i_size_read(inode));
-
-	if (ei->valid_size < new_valid_size) {
-		err = exfat_extend_valid_size(inode, new_valid_size, false);
-		if (err < 0) {
-			inode_unlock(inode);
-			return vmf_fs_error(err);
-		}
-	}
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vmf->vma->vm_file);
 
+	filemap_invalidate_lock_shared(inode->i_mapping);
+	ret = iomap_page_mkwrite(vmf, &exfat_mkwrite_iomap_ops, NULL);
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+	sb_end_pagefault(inode->i_sb);
 	inode_unlock(inode);
 
-	return filemap_page_mkwrite(vmf);
+	return ret;
 }
 
 static const struct vm_operations_struct exfat_file_vm_ops = {
@@ -797,6 +839,21 @@ static int exfat_file_mmap_prepare(struct vm_area_desc *desc)
 	if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb)))
 		return -EIO;
 
+	if (vma_desc_test_flags(desc, VMA_WRITE_BIT)) {
+		struct inode *inode = file_inode(file);
+		loff_t from, to;
+		int err;
+
+		from = ((loff_t)desc->pgoff << PAGE_SHIFT);
+		to = min_t(loff_t, i_size_read(inode),
+				from + vma_desc_size(desc));
+		if (EXFAT_I(inode)->valid_size < to) {
+			err = exfat_extend_valid_size(inode, to, false);
+			if (err)
+				return err;
+		}
+	}
+
 	file_accessed(file);
 	desc->vm_ops = &exfat_file_vm_ops;
 	return 0;
@@ -811,7 +868,24 @@ static ssize_t exfat_splice_read(struct file *in, loff_t *ppos,
 	return filemap_splice_read(in, ppos, pipe, len, flags);
 }
 
+static int exfat_file_open(struct inode *inode, struct file *filp)
+{
+	int err;
+
+	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
+		return -EIO;
+
+	err = generic_file_open(inode, filp);
+	if (err)
+		return err;
+
+	filp->f_mode |= FMODE_CAN_ODIRECT;
+
+	return 0;
+}
+
 const struct file_operations exfat_file_operations = {
+	.open		= exfat_file_open,
 	.llseek		= generic_file_llseek,
 	.read_iter	= exfat_file_read_iter,
 	.write_iter	= exfat_file_write_iter,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 2985b5d736f6..c53ae9293cfe 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -13,9 +13,11 @@
 #include <linux/uio.h>
 #include <linux/random.h>
 #include <linux/iversion.h>
+#include <linux/iomap.h>
 
 #include "exfat_raw.h"
 #include "exfat_fs.h"
+#include "iomap.h"
 
 int __exfat_write_inode(struct inode *inode, int sync)
 {
@@ -76,15 +78,7 @@ int __exfat_write_inode(struct inode *inode, int sync)
 		on_disk_size = 0;
 
 	ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
-	/*
-	 * mmap write does not use exfat_write_end(), valid_size may be
-	 * extended to the sector-aligned length in exfat_get_block().
-	 * So we need to fixup valid_size to the writren length.
-	 */
-	if (on_disk_size < ei->valid_size)
-		ep2->dentry.stream.valid_size = ep2->dentry.stream.size;
-	else
-		ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
+	ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
 
 	if (on_disk_size) {
 		ep2->dentry.stream.flags = ei->flags;
@@ -246,155 +240,10 @@ int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 	return 0;
 }
 
-static int exfat_get_block(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh_result, int create)
-{
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	struct super_block *sb = inode->i_sb;
-	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
-	int err = 0;
-	unsigned long mapped_blocks = 0;
-	unsigned int cluster, sec_offset, count;
-	sector_t last_block;
-	sector_t phys = 0;
-	sector_t valid_blks;
-	loff_t i_size;
-	bool balloc;
-
-	mutex_lock(&sbi->s_lock);
-	i_size = i_size_read(inode);
-	last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb);
-	if (iblock >= last_block && !create)
-		goto done;
-
-	/* Is this block already allocated? */
-	count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
-	err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
-			&cluster, &count, create, &balloc);
-	if (err) {
-		if (err != -ENOSPC)
-			exfat_fs_error_ratelimit(sb,
-				"failed to bmap (inode : %p iblock : %llu, err : %d)",
-				inode, (unsigned long long)iblock, err);
-		goto unlock_ret;
-	}
-
-	if (cluster == EXFAT_EOF_CLUSTER)
-		goto done;
-
-	/* sector offset in cluster */
-	sec_offset = iblock & (sbi->sect_per_clus - 1);
-
-	phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset;
-	mapped_blocks = ((unsigned long)count << sbi->sect_per_clus_bits) - sec_offset;
-	max_blocks = min(mapped_blocks, max_blocks);
-
-	map_bh(bh_result, sb, phys);
-	if (buffer_delay(bh_result))
-		clear_buffer_delay(bh_result);
-
-	/*
-	 * In most cases, we just need to set bh_result to mapped, unmapped
-	 * or new status as follows:
-	 *  1. i_size == valid_size
-	 *  2. write case (create == 1)
-	 *  3. direct_read (!bh_result->b_folio)
-	 *     -> the unwritten part will be zeroed in exfat_direct_IO()
-	 *
-	 * Otherwise, in the case of buffered read, it is necessary to take
-	 * care the last nested block if valid_size is not equal to i_size.
-	 */
-	if (i_size == ei->valid_size || create || !bh_result->b_folio)
-		valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
-	else
-		valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
-
-	/* The range has been fully written, map it */
-	if (iblock + max_blocks < valid_blks)
-		goto done;
-
-	/* The range has been partially written, map the written part */
-	if (iblock < valid_blks) {
-		max_blocks = valid_blks - iblock;
-		goto done;
-	}
-
-	/* The area has not been written, map and mark as new for create case */
-	if (create) {
-		set_buffer_new(bh_result);
-		ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb);
-		mark_inode_dirty(inode);
-		goto done;
-	}
-
-	/*
-	 * The area has just one block partially written.
-	 * In that case, we should read and fill the unwritten part of
-	 * a block with zero.
-	 */
-	if (bh_result->b_folio && iblock == valid_blks &&
-	    (ei->valid_size & (sb->s_blocksize - 1))) {
-		loff_t size, pos;
-		void *addr;
-
-		max_blocks = 1;
-
-		/*
-		 * No buffer_head is allocated.
-		 * (1) bmap: It's enough to set blocknr without I/O.
-		 * (2) read: The unwritten part should be filled with zero.
-		 *           If a folio does not have any buffers,
-		 *           let's returns -EAGAIN to fallback to
-		 *           block_read_full_folio() for per-bh IO.
-		 */
-		if (!folio_buffers(bh_result->b_folio)) {
-			err = -EAGAIN;
-			goto done;
-		}
-
-		pos = EXFAT_BLK_TO_B(iblock, sb);
-		size = ei->valid_size - pos;
-		addr = folio_address(bh_result->b_folio) +
-			offset_in_folio(bh_result->b_folio, pos);
-
-		/* Check if bh->b_data points to proper addr in folio */
-		if (bh_result->b_data != addr) {
-			exfat_fs_error_ratelimit(sb,
-					"b_data(%p) != folio_addr(%p)",
-					bh_result->b_data, addr);
-			err = -EINVAL;
-			goto done;
-		}
-
-		/* Read a block */
-		err = bh_read(bh_result, 0);
-		if (err < 0)
-			goto done;
-
-		/* Zero unwritten part of a block */
-		memset(bh_result->b_data + size, 0, bh_result->b_size - size);
-		err = 0;
-		goto done;
-	}
-
-	/*
-	 * The area has not been written, clear mapped for read/bmap cases.
-	 * If so, it will be filled with zero without reading from disk.
-	 */
-	clear_buffer_mapped(bh_result);
-done:
-	bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
-	if (err < 0)
-		clear_buffer_mapped(bh_result);
-unlock_ret:
-	mutex_unlock(&sbi->s_lock);
-	return err;
-}
-
 static int exfat_read_folio(struct file *file, struct folio *folio)
 {
-	return mpage_read_folio(folio, exfat_get_block);
+	iomap_bio_read_folio(folio, &exfat_read_iomap_ops);
+	return 0;
 }
 
 static void exfat_readahead(struct readahead_control *rac)
@@ -410,108 +259,46 @@ static void exfat_readahead(struct readahead_control *rac)
 	    ei->valid_size < pos + readahead_length(rac))
 		return;
 
-	mpage_readahead(rac, exfat_get_block);
+	iomap_bio_readahead(rac, &exfat_read_iomap_ops);
 }
 
 static int exfat_writepages(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
-	if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
-		return -EIO;
-
-	return mpage_writepages(mapping, wbc, exfat_get_block);
-}
-
-static void exfat_write_failed(struct address_space *mapping, loff_t to)
-{
-	struct inode *inode = mapping->host;
-
-	if (to > i_size_read(inode)) {
-		truncate_pagecache(inode, i_size_read(inode));
-		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
-		exfat_truncate(inode);
-	}
-}
-
-static int exfat_write_begin(const struct kiocb *iocb,
-			     struct address_space *mapping,
-			     loff_t pos, unsigned int len,
-			     struct folio **foliop, void **fsdata)
-{
-	int ret;
+	struct iomap_writepage_ctx wpc = {
+		.inode		= mapping->host,
+		.wbc		= wbc,
+		.ops		= &exfat_writeback_ops,
+	};
 
 	if (unlikely(exfat_forced_shutdown(mapping->host->i_sb)))
 		return -EIO;
 
-	ret = block_write_begin(mapping, pos, len, foliop, exfat_get_block);
-
-	if (ret < 0)
-		exfat_write_failed(mapping, pos+len);
-
-	return ret;
-}
-
-static int exfat_write_end(const struct kiocb *iocb,
-			   struct address_space *mapping,
-			   loff_t pos, unsigned int len, unsigned int copied,
-			   struct folio *folio, void *fsdata)
-{
-	struct inode *inode = mapping->host;
-	struct exfat_inode_info *ei = EXFAT_I(inode);
-	int err;
-
-	err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
-	if (err < len)
-		exfat_write_failed(mapping, pos+len);
-
-	if (!(err < 0) && pos + err > ei->valid_size) {
-		ei->valid_size = pos + err;
-		mark_inode_dirty(inode);
-	}
-
-	if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) {
-		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
-		ei->attr |= EXFAT_ATTR_ARCHIVE;
-		mark_inode_dirty(inode);
-	}
-
-	return err;
+	return iomap_writepages(&wpc);
 }
 
 static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block)
 {
 	sector_t blocknr;
 
-	/* exfat_get_cluster() assumes the requested blocknr isn't truncated. */
 	down_read(&EXFAT_I(mapping->host)->truncate_lock);
-	blocknr = generic_block_bmap(mapping, block, exfat_get_block);
+	blocknr = iomap_bmap(mapping, block, &exfat_read_iomap_ops);
 	up_read(&EXFAT_I(mapping->host)->truncate_lock);
 	return blocknr;
 }
 
-/*
- * exfat_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This is required during truncate to physically zeroout the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- * Also, avoid causing failure from fsx for cases of "data past EOF"
- */
-int exfat_block_truncate_page(struct inode *inode, loff_t from)
-{
-	return block_truncate_page(inode->i_mapping, from, exfat_get_block);
-}
-
 static const struct address_space_operations exfat_aops = {
-	.dirty_folio	= block_dirty_folio,
-	.invalidate_folio = block_invalidate_folio,
-	.read_folio	= exfat_read_folio,
-	.readahead	= exfat_readahead,
-	.writepages	= exfat_writepages,
-	.write_begin	= exfat_write_begin,
-	.write_end	= exfat_write_end,
-	.direct_IO	= noop_direct_IO,
-	.bmap		= exfat_aop_bmap,
-	.migrate_folio	= buffer_migrate_folio,
+	.read_folio		= exfat_read_folio,
+	.readahead		= exfat_readahead,
+	.writepages		= exfat_writepages,
+	.direct_IO		= noop_direct_IO,
+	.dirty_folio		= iomap_dirty_folio,
+	.bmap			= exfat_aop_bmap,
+	.migrate_folio		= filemap_migrate_folio,
+	.is_partially_uptodate	= iomap_is_partially_uptodate,
+	.error_remove_folio	= generic_error_remove_folio,
+	.release_folio		= iomap_release_folio,
+	.invalidate_folio	= iomap_invalidate_folio,
 };
 
 static inline unsigned long exfat_hash(loff_t i_pos)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/5] exfat: add support for multi-cluster allocation
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
                   ` (2 preceding siblings ...)
  2026-03-26 11:50 ` [PATCH 3/5] exfat: add iomap buffered " Namjae Jeon
@ 2026-03-26 11:50 ` Namjae Jeon
  2026-03-26 11:50 ` [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek Namjae Jeon
  2026-03-27  6:33 ` [PATCH 0/5] exfat: convert to iomap Christoph Hellwig
  5 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

Currently exfat_map_cluster() allocates and returns only one cluster
at a time even when more clusters are needed. This causes multiple
FAT walks and repeated allocation calls during large sequential writes
or when using iomap for writes. This change exfat_map_cluster() and
exfat_alloc_cluster() to be able to allocate multiple contiguous
clusters.

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      |  2 +-
 fs/exfat/exfat_fs.h |  2 +-
 fs/exfat/fatent.c   | 26 ++++++++++++++++----------
 fs/exfat/file.c     |  2 +-
 fs/exfat/inode.c    | 32 +++++---------------------------
 fs/exfat/namei.c    |  2 +-
 6 files changed, 25 insertions(+), 41 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index a2c2b998808c..857b22e431cd 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -308,7 +308,7 @@ int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu)
 
 	exfat_chain_set(clu, EXFAT_EOF_CLUSTER, 0, ALLOC_NO_FAT_CHAIN);
 
-	ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode));
+	ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode), false);
 	if (ret)
 		return ret;
 
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 54da001a8f55..5992755b5ab3 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -448,7 +448,7 @@ int exfat_clear_volume_dirty(struct super_block *sb);
 #define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu, NULL)
 
 int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
-		struct exfat_chain *p_chain, bool sync_bmap);
+		struct exfat_chain *p_chain, bool sync_bmap, bool contig);
 int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain);
 int exfat_ent_get(struct super_block *sb, unsigned int loc,
 		unsigned int *content, struct buffer_head **last);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index f2e5d5dde393..758c2d971e73 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -424,7 +424,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
 }
 
 int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
-		struct exfat_chain *p_chain, bool sync_bmap)
+		struct exfat_chain *p_chain, bool sync_bmap, bool contig)
 {
 	int ret = -ENOSPC;
 	unsigned int total_cnt;
@@ -475,14 +475,20 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 
 	while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) !=
 	       EXFAT_EOF_CLUSTER) {
-		if (new_clu != hint_clu &&
-		    p_chain->flags == ALLOC_NO_FAT_CHAIN) {
-			if (exfat_chain_cont_cluster(sb, p_chain->dir,
-					p_chain->size)) {
-				ret = -EIO;
-				goto free_cluster;
+		if (new_clu != hint_clu) {
+			if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
+				if (exfat_chain_cont_cluster(sb, p_chain->dir,
+							     p_chain->size)) {
+					ret = -EIO;
+					goto free_cluster;
+				}
+				p_chain->flags = ALLOC_FAT_CHAIN;
+			}
+
+			if (contig && p_chain->size > 0) {
+				hint_clu--;
+				goto done;
 			}
-			p_chain->flags = ALLOC_FAT_CHAIN;
 		}
 
 		/* update allocation bitmap */
@@ -512,9 +518,9 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 		last_clu = new_clu;
 
 		if (p_chain->size == num_alloc) {
+done:
 			sbi->clu_srch_ptr = hint_clu;
-			sbi->used_clusters += num_alloc;
-
+			sbi->used_clusters += p_chain->size;
 			mutex_unlock(&sbi->bitmap_lock);
 			return 0;
 		}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 5f85e2e0a71e..d7857aec072b 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -57,7 +57,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
 	clu.flags = ei->flags;
 
 	ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters,
-			&clu, inode_needs_sync(inode));
+			&clu, inode_needs_sync(inode), false);
 	if (ret)
 		return ret;
 
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index c53ae9293cfe..46dc98ef1afb 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -128,14 +128,10 @@ int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
 	unsigned int local_clu_offset = clu_offset;
-	unsigned int num_to_be_allocated = 0, num_clusters;
+	unsigned int num_to_be_allocated = *count, num_clusters;
 
 	num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
-
-	if (clu_offset >= num_clusters)
-		num_to_be_allocated = clu_offset - num_clusters + 1;
-
-	if (!create && (num_to_be_allocated > 0)) {
+	if (!create && clu_offset >= num_clusters) {
 		*clu = EXFAT_EOF_CLUSTER;
 		return 0;
 	}
@@ -176,7 +172,7 @@ int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 		}
 
 		ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu,
-				inode_needs_sync(inode));
+				inode_needs_sync(inode), true);
 		if (ret)
 			return ret;
 
@@ -210,26 +206,8 @@ int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
 		}
 
 		*clu = new_clu.dir;
-
-		inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
-
-		/*
-		 * Move *clu pointer along FAT chains (hole care) because the
-		 * caller of this function expect *clu to be the last cluster.
-		 * This only works when num_to_be_allocated >= 2,
-		 * *clu = (the first cluster of the allocated chain) =>
-		 * (the last cluster of ...)
-		 */
-		if (ei->flags == ALLOC_NO_FAT_CHAIN) {
-			*clu += num_to_be_allocated - 1;
-		} else {
-			while (num_to_be_allocated > 1) {
-				if (exfat_get_next_cluster(sb, clu))
-					return -EIO;
-				num_to_be_allocated--;
-			}
-		}
-		*count = 1;
+		*count = new_clu.size;
+		inode->i_blocks += EXFAT_CLU_TO_B(new_clu.size, sbi) >> 9;
 		*balloc = true;
 	}
 
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index d0ea1ff81c09..6261cd994d1d 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -347,7 +347,7 @@ int exfat_find_empty_entry(struct inode *inode,
 		}
 
 		/* allocate a cluster */
-		ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode));
+		ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode), false);
 		if (ret)
 			return ret;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
                   ` (3 preceding siblings ...)
  2026-03-26 11:50 ` [PATCH 4/5] exfat: add support for multi-cluster allocation Namjae Jeon
@ 2026-03-26 11:50 ` Namjae Jeon
  2026-03-30  6:39   ` Christoph Hellwig
  2026-03-27  6:33 ` [PATCH 0/5] exfat: convert to iomap Christoph Hellwig
  5 siblings, 1 reply; 31+ messages in thread
From: Namjae Jeon @ 2026-03-26 11:50 UTC (permalink / raw)
  To: sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch, Namjae Jeon

Adds exfat_file_llseek() that implements these whence values via
the iomap layer (iomap_seek_hole() and iomap_seek_data()) using the
existing exfat_read_iomap_ops.
Unlike many other modern filesystems, exFAT does not support sparse files
with unallocated clusters (holes). In exFAT, clusters are always fully
allocated once they are written or preallocated. In addition, exFAT
maintains a separate "Valid Data Length" (valid_size) that is distinct
from the logical file size. This affects how holes are reported during
seeking. In exfat_read_iomap_begin(), ranges where the offset is greater
than or equal to ei->valid_size are mapped as IOMAP_UNWRITTEN, while ranges
below valid_size are mapped as IOMAP_MAPPED. This mapping behavior is used
by the iomap seek functions to correctly report SEEK_HOLE and SEEK_DATA
positions.

  - Ranges with offset >= ei->valid_size are mapped as IOMAP_UNWRITTEN.
  - Ranges with offset < ei->valid_size are mapped as IOMAP_MAPPED.

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/file.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index d7857aec072b..9a30c32b3a05 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -884,9 +884,34 @@ static int exfat_file_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static loff_t exfat_file_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	switch (whence) {
+	case SEEK_HOLE:
+		inode_lock_shared(inode);
+		offset = iomap_seek_hole(inode, offset, &exfat_read_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	case SEEK_DATA:
+		inode_lock_shared(inode);
+		offset = iomap_seek_data(inode, offset, &exfat_read_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	default:
+		return generic_file_llseek_size(file, offset, whence,
+						inode->i_sb->s_maxbytes,
+						i_size_read(inode));
+	}
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+}
+
 const struct file_operations exfat_file_operations = {
 	.open		= exfat_file_open,
-	.llseek		= generic_file_llseek,
+	.llseek		= exfat_file_llseek,
 	.read_iter	= exfat_file_read_iter,
 	.write_iter	= exfat_file_write_iter,
 	.unlocked_ioctl = exfat_ioctl,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/5] exfat: convert to iomap
  2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
                   ` (4 preceding siblings ...)
  2026-03-26 11:50 ` [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek Namjae Jeon
@ 2026-03-27  6:33 ` Christoph Hellwig
  2026-03-27  6:46   ` Namjae Jeon
  5 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-27  6:33 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, hch

On Thu, Mar 26, 2026 at 08:50:40PM +0900, Namjae Jeon wrote:
> This patch series converts the exfat filesystem to the iomap framework for
> buffered I/O, direct I/O, and llseek (SEEK_HOLE/SEEK_DATA) support.

What tree is this against?  Patch fails to apply to current mainline.
Or if you have a link to a git tree, that could be useful as well.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/5] exfat: convert to iomap
  2026-03-27  6:33 ` [PATCH 0/5] exfat: convert to iomap Christoph Hellwig
@ 2026-03-27  6:46   ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-27  6:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling

On Fri, Mar 27, 2026 at 3:33 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Thu, Mar 26, 2026 at 08:50:40PM +0900, Namjae Jeon wrote:
> > This patch series converts the exfat filesystem to the iomap framework for
> > buffered I/O, direct I/O, and llseek (SEEK_HOLE/SEEK_DATA) support.
>
> What tree is this against?  Patch fails to apply to current mainline.
This patch series was created against exfat #dev branch.
> Or if you have a link to a git tree, that could be useful as well.
You can find the git tree here:
    git://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git dev

Thanks!

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
@ 2026-03-30  2:45   ` Chi Zhiling
  2026-03-31  5:29     ` Namjae Jeon
  2026-03-30  6:30   ` Christoph Hellwig
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 31+ messages in thread
From: Chi Zhiling @ 2026-03-30  2:45 UTC (permalink / raw)
  To: Namjae Jeon, sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, dxdt, chizhiling, hch

Hi, Namjae

Could you CC my personal email "chizhiling@163.com" when sending v2? 
since my work email can’t receive messages immediately.


On 3/26/26 7:50 PM, Namjae Jeon wrote:
> Add iomap support to the exfat filesystem. This patch introduces the
> necessary iomap infrastructure by adding a new iomap.c file and related
> iomap operations. The main change is converting exfat_extend_valid_size()
> to use iomap_zero_range() instead of the legacy write_begin/write_end path.
> To support this, exfat_map_cluster() is extended to return whether a new
> cluster was allocated via a balloc flag, and a new helper function
> exfat_cluster_to_phys() is added. Also, data_start_bytes is added to
> struct exfat_sb_info for easier conversion from cluster number to physical
> byte offset.
> 
> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>   fs/exfat/Makefile   |   2 +-
>   fs/exfat/exfat_fs.h |  12 ++
>   fs/exfat/file.c     |  54 +++-----
>   fs/exfat/inode.c    |   9 +-
>   fs/exfat/iomap.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++
>   fs/exfat/iomap.h    |  16 +++
>   fs/exfat/super.c    |   1 +
>   7 files changed, 361 insertions(+), 38 deletions(-)
>   create mode 100644 fs/exfat/iomap.c
>   create mode 100644 fs/exfat/iomap.h
> 
> diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
> index ed51926a4971..e06bf85870ae 100644
> --- a/fs/exfat/Makefile
> +++ b/fs/exfat/Makefile
> @@ -5,4 +5,4 @@
>   obj-$(CONFIG_EXFAT_FS) += exfat.o
>   
>   exfat-y	:= inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \
> -	   file.o balloc.o
> +	   file.o balloc.o iomap.o
> diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
> index 9fed9fb33cae..860f2e438b63 100644
> --- a/fs/exfat/exfat_fs.h
> +++ b/fs/exfat/exfat_fs.h
> @@ -259,6 +259,7 @@ struct exfat_sb_info {
>   	unsigned long long FAT1_start_sector; /* FAT1 start sector */
>   	unsigned long long FAT2_start_sector; /* FAT2 start sector */
>   	unsigned long long data_start_sector; /* data area start sector */
> +	unsigned long long data_start_bytes;
>   	unsigned int num_FAT_sectors; /* num of FAT sectors */
>   	unsigned int root_dir; /* root dir cluster */
>   	unsigned int dentries_per_clu; /* num of dentries per cluster */
> @@ -432,6 +433,13 @@ static inline loff_t exfat_ondisk_size(const struct inode *inode)
>   	return ((loff_t)inode->i_blocks) << 9;
>   }
>   
> +static inline loff_t exfat_cluster_to_phys(struct exfat_sb_info *sbi,
> +		unsigned int clus)
> +{
> +	return ((loff_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->cluster_size_bits) +
> +		sbi->data_start_bytes;
> +}
> +
>   /* super.c */
>   int exfat_set_volume_dirty(struct super_block *sb);
>   int exfat_clear_volume_dirty(struct super_block *sb);
> @@ -480,6 +488,7 @@ long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
>   long exfat_compat_ioctl(struct file *filp, unsigned int cmd,
>   				unsigned long arg);
>   int exfat_force_shutdown(struct super_block *sb, u32 flags);
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync);
>   
>   /* namei.c */
>   extern const struct dentry_operations exfat_dentry_ops;
> @@ -543,6 +552,9 @@ int __exfat_write_inode(struct inode *inode, int sync);
>   int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
>   void exfat_evict_inode(struct inode *inode);
>   int exfat_block_truncate_page(struct inode *inode, loff_t from);
> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> +		unsigned int *clu, unsigned int *count, int create,
> +		bool *balloc);
>   
>   /* exfat/nls.c */
>   unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
> diff --git a/fs/exfat/file.c b/fs/exfat/file.c
> index 2daf0dbabb24..756846b774c4 100644
> --- a/fs/exfat/file.c
> +++ b/fs/exfat/file.c
> @@ -14,9 +14,11 @@
>   #include <linux/writeback.h>
>   #include <linux/filelock.h>
>   #include <linux/falloc.h>
> +#include <linux/iomap.h>
>   
>   #include "exfat_raw.h"
>   #include "exfat_fs.h"
> +#include "iomap.h"
>   
>   static int exfat_cont_expand(struct inode *inode, loff_t size)
>   {
> @@ -628,44 +630,28 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
>   	return blkdev_issue_flush(inode->i_sb->s_bdev);
>   }
>   
> -static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
>   {
> -	int err;
> -	loff_t pos;
>   	struct exfat_inode_info *ei = EXFAT_I(inode);
> -	struct address_space *mapping = inode->i_mapping;
> -	const struct address_space_operations *ops = mapping->a_ops;
> -
> -	pos = ei->valid_size;
> -	while (pos < new_valid_size) {
> -		u32 len;
> -		struct folio *folio;
> -		unsigned long off;
> -
> -		len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
> -		if (pos + len > new_valid_size)
> -			len = new_valid_size - pos;
> -
> -		err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
> -		if (err)
> -			goto out;
> -
> -		off = offset_in_folio(folio, pos);
> -		folio_zero_new_buffers(folio, off, off + len);
> +	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
> +	loff_t old_valid_size;
> +	int ret = 0;
>   
> -		err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
> -		if (err < 0)
> -			goto out;
> -		pos += len;
> +	mutex_lock(&sbi->s_lock);
> +	old_valid_size = ei->valid_size;
> +	mutex_unlock(&sbi->s_lock);

I’ve recently been refactoring around this lock :)

>   
> -		balance_dirty_pages_ratelimited(mapping);
> -		cond_resched();
> +	if (old_valid_size < off) {
> +		ret = iomap_zero_range(inode, old_valid_size,
> +				off - old_valid_size, NULL,
> +				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
> +				NULL);
> +		if (!ret && bsync)
> +			ret = filemap_write_and_wait_range(inode->i_mapping,
> +					old_valid_size, off - 1);
>   	}
>   
> -	return 0;
> -
> -out:
> -	return err;
> +	return ret;
>   }
>   
>   static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
> @@ -702,7 +688,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
>   	}
>   
>   	if (pos > valid_size) {
> -		ret = exfat_extend_valid_size(inode, pos);
> +		ret = exfat_extend_valid_size(inode, pos, false);
>   		if (ret < 0 && ret != -ENOSPC) {
>   			exfat_err(inode->i_sb,
>   				"write: fail to zero from %llu to %llu(%zd)",
> @@ -760,7 +746,7 @@ static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
>   	new_valid_size = min(new_valid_size, i_size_read(inode));
>   
>   	if (ei->valid_size < new_valid_size) {
> -		err = exfat_extend_valid_size(inode, new_valid_size);
> +		err = exfat_extend_valid_size(inode, new_valid_size, false);
>   		if (err < 0) {
>   			inode_unlock(inode);
>   			return vmf_fs_error(err);
> diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
> index beb9ea7cca9f..cc54cce65a31 100644
> --- a/fs/exfat/inode.c
> +++ b/fs/exfat/inode.c
> @@ -123,8 +123,9 @@ void exfat_sync_inode(struct inode *inode)
>    * Output: errcode, cluster number
>    * *clu = (~0), if it's unable to allocate a new cluster
>    */
> -static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> -		unsigned int *clu, unsigned int *count, int create)
> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> +		unsigned int *clu, unsigned int *count, int create,
> +		bool *balloc)
>   {
>   	int ret;
>   	unsigned int last_clu;
> @@ -235,6 +236,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
>   			}
>   		}
>   		*count = 1;
> +		*balloc = true;
>   	}
>   
>   	/* hint information */
> @@ -258,6 +260,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
>   	sector_t phys = 0;
>   	sector_t valid_blks;
>   	loff_t i_size;
> +	bool balloc;
>   
>   	mutex_lock(&sbi->s_lock);
>   	i_size = i_size_read(inode);
> @@ -268,7 +271,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
>   	/* Is this block already allocated? */
>   	count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
>   	err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
> -			&cluster, &count, create);
> +			&cluster, &count, create, &balloc);
>   	if (err) {
>   		if (err != -ENOSPC)
>   			exfat_fs_error_ratelimit(sb,
> diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
> new file mode 100644
> index 000000000000..e4135a13454f
> --- /dev/null
> +++ b/fs/exfat/iomap.c
> @@ -0,0 +1,305 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * iomap callack functions
> + *
> + * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
> + */
> +
> +#include <linux/iomap.h>
> +#include <linux/pagemap.h>
> +
> +#include "exfat_raw.h"
> +#include "exfat_fs.h"
> +#include "iomap.h"
> +
> +/*
> + * exfat_iomap_put_folio - Put folio after iomap operation
> + *
> + * Called when iomap is finished with a folio zero-fills portions of
> + * the folio beyond ->valid_size to prevent exposing uninitialized data.
> + */
> +static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
> +		unsigned int len, struct folio *folio)
> +{
> +	struct exfat_inode_info *ei = EXFAT_I(inode);
> +	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
> +	unsigned long sector_size = 1UL << inode->i_blkbits;
> +	loff_t start_down, end_up, init;
> +
> +	mutex_lock(&sbi->s_lock);
> +	start_down = round_down(pos, sector_size);
> +	end_up = (pos + len - 1) | (sector_size - 1);
> +	init = ei->valid_size;
> +
> +	if (init >= start_down && init <= end_up) {
> +		if (init < pos) {
> +			loff_t offset = offset_in_folio(folio, pos + len);
> +
> +			if (offset == 0)
> +				offset = folio_size(folio);
> +			folio_zero_segments(folio,
> +					offset_in_folio(folio, init),
> +					offset_in_folio(folio, pos),
> +					offset,
> +					folio_size(folio));
> +
> +		} else  {
> +			loff_t offset = max_t(loff_t, pos + len, init);
> +
> +			offset = offset_in_folio(folio, offset);
> +			if (offset == 0)
> +				offset = folio_size(folio);
> +			folio_zero_segment(folio,
> +					offset,
> +					folio_size(folio));
> +		}
> +	} else if (init <= pos) {
> +		loff_t offset = 0, offset2 = offset_in_folio(folio, pos + len);
> +
> +		if ((init >> folio_shift(folio)) == (pos >> folio_shift(folio)))
> +			offset = offset_in_folio(folio, init);
> +		if (offset2 == 0)
> +			offset2 = folio_size(folio);
> +		folio_zero_segments(folio,
> +				offset,
> +				offset_in_folio(folio, pos),
> +				offset2,
> +				folio_size(folio));
> +	}
> +
> +	folio_unlock(folio);
> +	folio_put(folio);
> +	mutex_unlock(&sbi->s_lock);
> +}
> +
> +const struct iomap_write_ops exfat_iomap_folio_ops = {
> +	.put_folio = exfat_iomap_put_folio,
> +};
> +
> +/*
> + * exfat_file_write_dio_end_io - Direct I/O write completion handler
> + *
> + * Updates i_size if the write extended the file. Called from the dio layer
> + * after I/O completion.
> + */
> +static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
> +		int error, unsigned int flags)
> +{
> +	struct inode *inode = file_inode(iocb->ki_filp);
> +
> +	if (error)
> +		return error;
> +
> +	if (size && i_size_read(inode) < iocb->ki_pos + size) {
> +		i_size_write(inode, iocb->ki_pos + size);
> +		mark_inode_dirty(inode);
> +	}
> +
> +	return 0;
> +}
> +
> +const struct iomap_dio_ops exfat_write_dio_ops = {
> +	.end_io		= exfat_file_write_dio_end_io,
> +};
> +
> +/*
> + * exfat_read_iomap_begin - Begin mapping for reads
> + *
> + * Maps file range to disk location for read operations (read folio,
> + * readahead, direct I/O read, etc.).
> + *
> + * Returns IOMAP_MAPPED for areas within ->valid_size, and IOMAP_UNWRITTEN
> + * for allocated but uninitialized regions beyond ->valid_size.
> + */
> +static int exfat_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +	struct exfat_inode_info *ei = EXFAT_I(inode);
> +	unsigned int cluster, num_clusters = EXFAT_B_TO_CLU_ROUND_UP(length, sbi);
> +	loff_t cluster_offset, cluster_length;
> +	int err = 0;
> +	bool balloc = false;
> +
> +	mutex_lock(&sbi->s_lock);
> +	iomap->bdev = inode->i_sb->s_bdev;
> +	iomap->offset = offset;
> +
> +	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> +			&cluster, &num_clusters, false, &balloc);
> +	if (err)
> +		goto out;
> +
> +	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
> +	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
> +	if (length > cluster_length - cluster_offset)
> +		iomap->length = cluster_length - cluster_offset;
> +	else
> +		iomap->length = length;

Using min here would be clearer

> +
> +	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
> +	if (offset >= ei->valid_size)
> +		iomap->type = IOMAP_UNWRITTEN;
> +	else
> +		iomap->type = IOMAP_MAPPED;
> +
> +	if (!(flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED &&
> +	    iomap->offset < ei->valid_size &&
> +	    iomap->offset + iomap->length > ei->valid_size) {
> +		iomap->length = round_up(ei->valid_size, 1 << inode->i_blkbits) -
> +			iomap->offset;

Aligning the end of the extent to the block size seems like a good 
option, since iomap can’t handle unaligned cases.

However, If valid_size isn’t block-aligned, then the extent will cover 
valid_size, and we should clear the region beyond valid_size, right?

But it seems that reads don’t do this, and they don’t call 
exfat_iomap_put_folio.
Have I missed something?


Thanks,

> +	}
> +
> +	iomap->flags |= IOMAP_F_MERGED;
> +out:
> +	mutex_unlock(&sbi->s_lock);
> +	return err;
> +}
> +
> +const struct iomap_ops exfat_read_iomap_ops = {
> +	.iomap_begin = exfat_read_iomap_begin,
> +};
> +
> +/*
> + * __exfat_write_iomap_begin - mapping logic for writes
> + *
> + * Maps the requested range and allocates clusters if needed.
> + */
> +static int __exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, struct iomap *iomap)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +	unsigned int cluster, num_clusters;
> +	loff_t cluster_offset, cluster_length;
> +	int err;
> +	bool balloc = false;
> +
> +	num_clusters = max(EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) -
> +		EXFAT_B_TO_CLU_ROUND_UP(offset, sbi), 1);
> +	mutex_lock(&sbi->s_lock);
> +	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> +			&cluster, &num_clusters, true, &balloc);
> +	if (err)
> +		goto out;
> +
> +	iomap->bdev = inode->i_sb->s_bdev;
> +	iomap->offset = offset;
> +
> +	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
> +	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
> +	if (length > cluster_length - cluster_offset)
> +		iomap->length = cluster_length - cluster_offset;
> +	else
> +		iomap->length = length;
> +	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
> +	iomap->type = IOMAP_MAPPED;
> +	if (balloc)
> +		iomap->flags = IOMAP_F_NEW;
> +out:
> +	mutex_unlock(&sbi->s_lock);
> +	return err;
> +}
> +
> +/*
> + * exfat_write_iomap_begin - Mapping for write operations
> + *
> + * Extends ->valid_size if the write starts beyond current initialized size.
> + * Then performs actual block mapping (possibly allocating clusters).
> + */
> +static int exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, unsigned int flags, struct iomap *iomap,
> +		struct iomap *srcmap)
> +{
> +	int ret;
> +
> +	if (EXFAT_I(inode)->valid_size < offset) {
> +		ret = exfat_extend_valid_size(inode, offset,
> +				flags & IOMAP_DIRECT ? true : false);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = __exfat_write_iomap_begin(inode, offset, length, iomap);
> +
> +	if (!(flags & IOMAP_DIRECT) && !ret &&
> +	    i_size_read(inode) < iomap->offset + iomap->length) {
> +		i_size_write(inode, iomap->offset + iomap->length);
> +		mark_inode_dirty(inode);
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * exfat_write_iomap_end - Update the state after write
> + *
> + * Extends ->valid_size to cover the newly written range.
> + * Marks the inode dirty if metadata was changed.
> + */
> +static int exfat_write_iomap_end(struct inode *inode, loff_t pos, loff_t length,
> +		ssize_t written, unsigned int flags, struct iomap *iomap)
> +{
> +	if (written) {
> +		struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
> +		struct exfat_inode_info *ei = EXFAT_I(inode);
> +		bool dirtied = false;
> +		loff_t end = pos + written;
> +
> +		mutex_lock(&sbi->s_lock);
> +		if (ei->valid_size < end) {
> +			ei->valid_size = end;
> +			dirtied = true;
> +		}
> +		mutex_unlock(&sbi->s_lock);
> +		if (dirtied)
> +			mark_inode_dirty(inode);
> +	}
> +
> +	return written;
> +}
> +
> +const struct iomap_ops exfat_write_iomap_ops = {
> +	.iomap_begin	= exfat_write_iomap_begin,
> +	.iomap_end	= exfat_write_iomap_end,
> +};
> +
> +static int exfat_mkwrite_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, unsigned int flags, struct iomap *iomap,
> +		struct iomap *srcmap)
> +{
> +	return __exfat_write_iomap_begin(inode, offset, length, iomap);
> +}
> +
> +const struct iomap_ops exfat_mkwrite_iomap_ops = {
> +	.iomap_begin	= exfat_mkwrite_iomap_begin,
> +	.iomap_end	= exfat_write_iomap_end,
> +};
> +
> +/*
> + * exfat_writeback_range - Map folio during writeback
> + *
> + * Called for each folio during writeback. If the folio falls outside the
> + * current iomap, remaps by calling read_iomap_begin.
> + */
> +static ssize_t exfat_writeback_range(struct iomap_writepage_ctx *wpc,
> +		struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
> +{
> +	if (offset < wpc->iomap.offset ||
> +	    offset >= wpc->iomap.offset + wpc->iomap.length) {
> +		int error;
> +
> +		error = exfat_read_iomap_begin(wpc->inode, offset, len,
> +				0, &wpc->iomap, NULL);
> +		if (error)
> +			return error;
> +	}
> +
> +	return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
> +}
> +
> +const struct iomap_writeback_ops exfat_writeback_ops = {
> +	.writeback_range	= exfat_writeback_range,
> +	.writeback_submit	= iomap_ioend_writeback_submit,
> +};
> diff --git a/fs/exfat/iomap.h b/fs/exfat/iomap.h
> new file mode 100644
> index 000000000000..4abe0dc452ee
> --- /dev/null
> +++ b/fs/exfat/iomap.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (c) 2026 Namjae Jeon <linkinjeon@kernel.org>
> + */
> +
> +#ifndef _LINUX_EXFAT_IOMAP_H
> +#define _LINUX_EXFAT_IOMAP_H
> +
> +extern const struct iomap_write_ops exfat_iomap_folio_ops;
> +extern const struct iomap_ops exfat_read_iomap_ops;
> +extern const struct iomap_ops exfat_write_iomap_ops;
> +extern const struct iomap_dio_ops exfat_write_dio_ops;
> +extern const struct iomap_writeback_ops exfat_writeback_ops;
> +extern const struct iomap_ops exfat_mkwrite_iomap_ops;
> +
> +#endif /* _LINUX_EXFAT_IOMAP_H */
> diff --git a/fs/exfat/super.c b/fs/exfat/super.c
> index 83396fd265cd..b69c4b0a926b 100644
> --- a/fs/exfat/super.c
> +++ b/fs/exfat/super.c
> @@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
>   	if (p_boot->num_fats == 2)
>   		sbi->FAT2_start_sector += sbi->num_FAT_sectors;
>   	sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
> +	sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;
>   	sbi->num_sectors = le64_to_cpu(p_boot->vol_length);
>   	/* because the cluster index starts with 2 */
>   	sbi->num_clusters = le32_to_cpu(p_boot->clu_count) +


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
  2026-03-30  2:45   ` Chi Zhiling
@ 2026-03-30  6:30   ` Christoph Hellwig
  2026-03-31  5:26     ` Namjae Jeon
  2026-04-01  2:24   ` Yuezhang.Mo
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-30  6:30 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

>  
> -static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)

This looks like at least partially unrelated refactoring.  Can you
split this out into a separate well-documented patch, or maybe even
two where one has the bulk reformatting changes, and one adds the
new bsync option?  Also it might help to document the bsync option.
Often it might be more readable to add a flags parameter with a named
flag to make things easier to follow.

> -static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> -		unsigned int *clu, unsigned int *count, int create)
> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> +		unsigned int *clu, unsigned int *count, int create,
> +		bool *balloc)

Similarly, this would be a good prep patch.

> diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
> new file mode 100644
> index 000000000000..e4135a13454f
> --- /dev/null
> +++ b/fs/exfat/iomap.c
> @@ -0,0 +1,305 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * iomap callack functions
> + *
> + * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
> + */

Also normally new infrastructure would get added with the users.
I.e. the bits you're using in the first iomap conversion would go
with that and so on.  But that's not a strict rule.

> +/*
> + * exfat_iomap_put_folio - Put folio after iomap operation
> + *
> + * Called when iomap is finished with a folio zero-fills portions of
> + * the folio beyond ->valid_size to prevent exposing uninitialized data.
> + */
> +static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
> +		unsigned int len, struct folio *folio)

Can you explain the logic here?  Shouldn't the iomap buffered I/O
code do all the needed zeroing for you based on the map type?  If not
how could we enhance the core iomap code so that we don't need this
in the file system, which feels like a bit of break of abstraction
barriers?

> +/*
> + * exfat_file_write_dio_end_io - Direct I/O write completion handler
> + *
> + * Updates i_size if the write extended the file. Called from the dio layer
> + * after I/O completion.
> + */
> +static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
> +		int error, unsigned int flags)
> +{
> +	struct inode *inode = file_inode(iocb->ki_filp);
> +
> +	if (error)
> +		return error;
> +
> +	if (size && i_size_read(inode) < iocb->ki_pos + size) {
> +		i_size_write(inode, iocb->ki_pos + size);
> +		mark_inode_dirty(inode);
> +	}
> +
> +	return 0;
> +}

I think in the long run we should just do this as the default in
the core iomap dio code when no end_io routine is provided for
writes.  But I can refactor this later to not hold you up.

> +static int exfat_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> +{

The read and write routines look very similar, and chance we could
extra most of the logic into a common helper?  Also the rounding of
the length using the start in the write version looks like it would
be the right thing for the read side anyway.

> +static int exfat_mkwrite_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, unsigned int flags, struct iomap *iomap,
> +		struct iomap *srcmap)
> +{
> +	return __exfat_write_iomap_begin(inode, offset, length, iomap);
> +}

The special mkwrite handling looks a bit odd to me.  I'll return to
that later in the series, but this is a good example where keeping
all the related code together would help the reviewer.

> +	if (offset < wpc->iomap.offset ||
> +	    offset >= wpc->iomap.offset + wpc->iomap.length) {
> +		int error;
> +
> +		error = exfat_read_iomap_begin(wpc->inode, offset, len,
> +				0, &wpc->iomap, NULL);

The read confused me a bit here, but I guess by the time we do writeback
everything is allocated in exfat.  This would be another good candidate
to directly call the low-level helper suggested above.

> diff --git a/fs/exfat/super.c b/fs/exfat/super.c
> index 83396fd265cd..b69c4b0a926b 100644
> --- a/fs/exfat/super.c
> +++ b/fs/exfat/super.c
> @@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
>  	if (p_boot->num_fats == 2)
>  		sbi->FAT2_start_sector += sbi->num_FAT_sectors;
>  	sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
> +	sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;

Is this related to iomap?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/5] exfat: add iomap direct I/O support
  2026-03-26 11:50 ` [PATCH 2/5] exfat: add iomap direct I/O support Namjae Jeon
@ 2026-03-30  6:33   ` Christoph Hellwig
  2026-03-31  5:23     ` Namjae Jeon
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-30  6:33 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Thu, Mar 26, 2026 at 08:50:42PM +0900, Namjae Jeon wrote:
> +	if (iocb->ki_flags & IOCB_DIRECT) {
> +		ret = iomap_dio_rw(iocb, iter, &exfat_write_iomap_ops,
> +				&exfat_write_dio_ops, 0, NULL, 0);
> +		if (ret == -ENOTBLK)
> +			ret = 0;

This seems to miss a fallback to __generic_file_write_iter?

> +	inode_lock_shared(inode);
> +	if (iocb->ki_flags & IOCB_DIRECT) {
> +		size_t count = iov_iter_count(iter);
> +
> +		if ((iocb->ki_pos | count) & (inode->i_sb->s_blocksize - 1)) {
> +			ret = -EINVAL;
> +			goto inode_unlock;
> +		}

iomap_dio_bio_iter() should already take care of the alignment check.

> -	.direct_IO	= exfat_direct_IO,
> +	.direct_IO	= noop_direct_IO,

This should not be needed, setting FMODE_CAN_ODIRECT in ->open should
be all that is needed.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-26 11:50 ` [PATCH 3/5] exfat: add iomap buffered " Namjae Jeon
@ 2026-03-30  6:38   ` Christoph Hellwig
  2026-03-31  5:22     ` Namjae Jeon
  2026-04-06 13:09   ` David Timber
  1 sibling, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-30  6:38 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

> -#define EXFAT_CLU_TO_B(b, sbi)		((b) << (sbi)->cluster_size_bits)
> +#define EXFAT_CLU_TO_B(b, sbi)		((loff_t)(b) << (sbi)->cluster_size_bits)

This should be a prep patch.

>  	if ((attr->ia_valid & ATTR_SIZE) &&
>  	    attr->ia_size > i_size_read(inode)) {
> +		loff_t old_size = i_size_read(inode);
> +
>  		error = exfat_cont_expand(inode, attr->ia_size);
> +		if (!error && attr->ia_size > old_size &&
> +		    old_size % PAGE_SIZE != 0) {
> +			loff_t len = min_t(loff_t,
> +					round_up(old_size, PAGE_SIZE) - old_size,
> +					attr->ia_size - old_size);
> +			error = iomap_zero_range(inode, old_size, len,
> +					NULL, &exfat_read_iomap_ops,
> +					&exfat_iomap_folio_ops, NULL);
> +		}

Why is this needed for iomap?  Shouldn't explicit zeroing on size change
be a prep patch?

>  	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
>  		return -EIO;
>  
> -	err = __generic_file_fsync(filp, start, end, datasync);
> +	err = file_write_and_wait_range(filp, start, end);
>  	if (err)
>  		return err;
>  
> +	if (!datasync)
> +		err = __exfat_write_inode(inode, 1);
> +	write_inode_now(inode, !datasync);

This seems to have lost the i_state check to see if a sync
is needed before __exfat_write_inode.  Also doing write_inode_now
plus the explicit inode sync seems duplicate.  Last but not least
I don't understand how this is related to iomap?

> +	ssize_t ret;
> +
> +	ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
> +			&exfat_write_dio_ops, 0, NULL, 0);
> +	if (ret == -ENOTBLK)
> +		ret = 0;
> +	else if (ret < 0)
> +		goto out;
> +
> +	if (iov_iter_count(from)) {
> +		loff_t offset, end;
> +		ssize_t written;
> +		int ret2;
> +
> +		offset = iocb->ki_pos;
> +		iocb->ki_flags &= ~IOCB_DIRECT;
> +		written = iomap_file_buffered_write(iocb, from,
> +				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
> +				NULL);
> +		if (written < 0) {
> +			ret = written;
> +			goto out;
> +		}
> +
> +		ret += written;
> +		end = iocb->ki_pos + written - 1;
> +		ret2 = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
> +				offset, end);
> +		if (ret2) {
> +			ret = -EIO;
> +			goto out;
> +		}
> +		if (!ret2)
> +			invalidate_mapping_pages(iocb->ki_filp->f_mapping,
> +					offset >> PAGE_SHIFT,
> +					end >> PAGE_SHIFT);
> +	}

Eventually we should factor this code int oa common helper.  Not needed
for this submission, though.

>  {
> -	int err;
>  	struct inode *inode = file_inode(vmf->vma->vm_file);
> -	struct exfat_inode_info *ei = EXFAT_I(inode);
> -	loff_t new_valid_size;
> +	vm_fault_t ret;
>  
>  	if (!inode_trylock(inode))
>  		return VM_FAULT_RETRY;
>  
> -	new_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
> -	new_valid_size = min(new_valid_size, i_size_read(inode));
> -
> -	if (ei->valid_size < new_valid_size) {
> -		err = exfat_extend_valid_size(inode, new_valid_size, false);

Why is this moving out?  I think the zeroing changes in this patch
are really something that should be split out and explained in the
commit log for better understanding and bisectability.

> +static int exfat_file_open(struct inode *inode, struct file *filp)
> +{
> +	int err;
> +
> +	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
> +		return -EIO;

This doesn't look iomap-related.

> +
> +	err = generic_file_open(inode, filp);
> +	if (err)
> +		return err;
> +
> +	filp->f_mode |= FMODE_CAN_ODIRECT;

This should go into the previous patch.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek
  2026-03-26 11:50 ` [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek Namjae Jeon
@ 2026-03-30  6:39   ` Christoph Hellwig
  2026-03-31  4:55     ` Namjae Jeon
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-30  6:39 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

> +	default:
> +		return generic_file_llseek_size(file, offset, whence,
> +						inode->i_sb->s_maxbytes,
> +						i_size_read(inode));

Just use generic_file_llseek directly here.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek
  2026-03-30  6:39   ` Christoph Hellwig
@ 2026-03-31  4:55     ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  4:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Mon, Mar 30, 2026 at 3:39 PM Christoph Hellwig <hch@lst.de> wrote:
>
> > +     default:
> > +             return generic_file_llseek_size(file, offset, whence,
> > +                                             inode->i_sb->s_maxbytes,
> > +                                             i_size_read(inode));
>
> Just use generic_file_llseek directly here.
Okay, I will update it like this.
Thanks for your review!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-30  6:38   ` Christoph Hellwig
@ 2026-03-31  5:22     ` Namjae Jeon
  2026-03-31  5:46       ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  5:22 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Mon, Mar 30, 2026 at 3:38 PM Christoph Hellwig <hch@lst.de> wrote:
>
> > -#define EXFAT_CLU_TO_B(b, sbi)               ((b) << (sbi)->cluster_size_bits)
> > +#define EXFAT_CLU_TO_B(b, sbi)               ((loff_t)(b) << (sbi)->cluster_size_bits)
>
> This should be a prep patch.
Okay.
>
> >       if ((attr->ia_valid & ATTR_SIZE) &&
> >           attr->ia_size > i_size_read(inode)) {
> > +             loff_t old_size = i_size_read(inode);
> > +
> >               error = exfat_cont_expand(inode, attr->ia_size);
> > +             if (!error && attr->ia_size > old_size &&
> > +                 old_size % PAGE_SIZE != 0) {
> > +                     loff_t len = min_t(loff_t,
> > +                                     round_up(old_size, PAGE_SIZE) - old_size,
> > +                                     attr->ia_size - old_size);
> > +                     error = iomap_zero_range(inode, old_size, len,
> > +                                     NULL, &exfat_read_iomap_ops,
> > +                                     &exfat_iomap_folio_ops, NULL);
> > +             }
>
> Why is this needed for iomap?  Shouldn't explicit zeroing on size change
> be a prep patch?
Okay, I will change it to a prep patch.
>
> >       if (unlikely(exfat_forced_shutdown(inode->i_sb)))
> >               return -EIO;
> >
> > -     err = __generic_file_fsync(filp, start, end, datasync);
> > +     err = file_write_and_wait_range(filp, start, end);
> >       if (err)
> >               return err;
> >
> > +     if (!datasync)
> > +             err = __exfat_write_inode(inode, 1);
> > +     write_inode_now(inode, !datasync);
>
> This seems to have lost the i_state check to see if a sync
> is needed before __exfat_write_inode.  Also doing write_inode_now
> plus the explicit inode sync seems duplicate.  Last but not least
> I don't understand how this is related to iomap?
There is a deadlock issue when using iomap_dio_rw(). In
exfat_write_iter(), the inode lock is held while calling iomap_dio_rw.
generic_write_sync()->__generic_file_fsync() in iomap_dio_complete()
can be triggered, which attempts to acquire the inode lock again.
I will fix i_state check and introduced duplicate inode syncing problem.
> > +     ssize_t ret;
> > +
> > +     ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
> > +                     &exfat_write_dio_ops, 0, NULL, 0);
> > +     if (ret == -ENOTBLK)
> > +             ret = 0;
> > +     else if (ret < 0)
> > +             goto out;
> > +
> > +     if (iov_iter_count(from)) {
> > +             loff_t offset, end;
> > +             ssize_t written;
> > +             int ret2;
> > +
> > +             offset = iocb->ki_pos;
> > +             iocb->ki_flags &= ~IOCB_DIRECT;
> > +             written = iomap_file_buffered_write(iocb, from,
> > +                             &exfat_write_iomap_ops, &exfat_iomap_folio_ops,
> > +                             NULL);
> > +             if (written < 0) {
> > +                     ret = written;
> > +                     goto out;
> > +             }
> > +
> > +             ret += written;
> > +             end = iocb->ki_pos + written - 1;
> > +             ret2 = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
> > +                             offset, end);
> > +             if (ret2) {
> > +                     ret = -EIO;
> > +                     goto out;
> > +             }
> > +             if (!ret2)
> > +                     invalidate_mapping_pages(iocb->ki_filp->f_mapping,
> > +                                     offset >> PAGE_SHIFT,
> > +                                     end >> PAGE_SHIFT);
> > +     }
>
> Eventually we should factor this code int oa common helper.  Not needed
> for this submission, though.
Okay.
>
> >  {
> > -     int err;
> >       struct inode *inode = file_inode(vmf->vma->vm_file);
> > -     struct exfat_inode_info *ei = EXFAT_I(inode);
> > -     loff_t new_valid_size;
> > +     vm_fault_t ret;
> >
> >       if (!inode_trylock(inode))
> >               return VM_FAULT_RETRY;
> >
> > -     new_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
> > -     new_valid_size = min(new_valid_size, i_size_read(inode));
> > -
> > -     if (ei->valid_size < new_valid_size) {
> > -             err = exfat_extend_valid_size(inode, new_valid_size, false);
>
> Why is this moving out?  I think the zeroing changes in this patch
> are really something that should be split out and explained in the
> commit log for better understanding and bisectability.
Okay, I will split it out and add detailed commit log.
>
> > +static int exfat_file_open(struct inode *inode, struct file *filp)
> > +{
> > +     int err;
> > +
> > +     if (unlikely(exfat_forced_shutdown(inode->i_sb)))
> > +             return -EIO;
>
> This doesn't look iomap-related.
Okay. I will split it out also.
>
> > +
> > +     err = generic_file_open(inode, filp);
> > +     if (err)
> > +             return err;
> > +
> > +     filp->f_mode |= FMODE_CAN_ODIRECT;
>
> This should go into the previous patch.
Right. I will fix it.
Thanks!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/5] exfat: add iomap direct I/O support
  2026-03-30  6:33   ` Christoph Hellwig
@ 2026-03-31  5:23     ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  5:23 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Mon, Mar 30, 2026 at 3:33 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Thu, Mar 26, 2026 at 08:50:42PM +0900, Namjae Jeon wrote:
> > +     if (iocb->ki_flags & IOCB_DIRECT) {
> > +             ret = iomap_dio_rw(iocb, iter, &exfat_write_iomap_ops,
> > +                             &exfat_write_dio_ops, 0, NULL, 0);
> > +             if (ret == -ENOTBLK)
> > +                     ret = 0;
>
> This seems to miss a fallback to __generic_file_write_iter?
There is a follow-up patch in the series that adds the fallback to
buffered write. I'll reorganize the patches so that this change and
the fallback are combined together in a clearer way.
>
> > +     inode_lock_shared(inode);
> > +     if (iocb->ki_flags & IOCB_DIRECT) {
> > +             size_t count = iov_iter_count(iter);
> > +
> > +             if ((iocb->ki_pos | count) & (inode->i_sb->s_blocksize - 1)) {
> > +                     ret = -EINVAL;
> > +                     goto inode_unlock;
> > +             }
>
> iomap_dio_bio_iter() should already take care of the alignment check.
Right, I will remove it.
>
> > -     .direct_IO      = exfat_direct_IO,
> > +     .direct_IO      = noop_direct_IO,
>
> This should not be needed, setting FMODE_CAN_ODIRECT in ->open should
> be all that is needed.
Okay. I will remove it.
Thanks!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-30  6:30   ` Christoph Hellwig
@ 2026-03-31  5:26     ` Namjae Jeon
  2026-03-31  5:48       ` Christoph Hellwig
  2026-04-01  3:07       ` Chi Zhiling
  0 siblings, 2 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  5:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Mon, Mar 30, 2026 at 3:31 PM Christoph Hellwig <hch@lst.de> wrote:
>
> >
> > -static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
> > +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
>
> This looks like at least partially unrelated refactoring.  Can you
> split this out into a separate well-documented patch, or maybe even
> two where one has the bulk reformatting changes, and one adds the
> new bsync option?  Also it might help to document the bsync option.
> Often it might be more readable to add a flags parameter with a named
> flag to make things easier to follow.
Okay. I'll split it out into a separate patch and add a flags
parameter with documentation.
>
> > -static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> > -             unsigned int *clu, unsigned int *count, int create)
> > +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> > +             unsigned int *clu, unsigned int *count, int create,
> > +             bool *balloc)
>
> Similarly, this would be a good prep patch.
Okay.
>
> > diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
> > new file mode 100644
> > index 000000000000..e4135a13454f
> > --- /dev/null
> > +++ b/fs/exfat/iomap.c
> > @@ -0,0 +1,305 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * iomap callack functions
> > + *
> > + * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
> > + */
>
> Also normally new infrastructure would get added with the users.
> I.e. the bits you're using in the first iomap conversion would go
> with that and so on.  But that's not a strict rule.
I'll try to split the introduction of the iomap infrastructure so that
only the parts actually used in each conversion step are added
together with the users.
>
> > +/*
> > + * exfat_iomap_put_folio - Put folio after iomap operation
> > + *
> > + * Called when iomap is finished with a folio zero-fills portions of
> > + * the folio beyond ->valid_size to prevent exposing uninitialized data.
> > + */
> > +static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
> > +             unsigned int len, struct folio *folio)
>
> Can you explain the logic here?  Shouldn't the iomap buffered I/O
> code do all the needed zeroing for you based on the map type?  If not
> how could we enhance the core iomap code so that we don't need this
> in the file system, which feels like a bit of break of abstraction
> barriers?
The reason we have exfat_iomap_put_folio() is because of exFAT's
VDL(->valid_size). When we map a range beyond ->valid_size, if we set
the map type to IOMAP_UNWRITTEN, iomap_zero_range() does not perform
zeroing.
However, if we set it to IOMAP_MAPPED, then iomap_zero_range() treats
the area beyond ->valid_size as valid data and could expose
uninitialized garbage data from disk. So it explicitly zero out the
portion beyond ->valid_size in exfat_iomap_put_folio().
I agree that it would be better if core iomap could handle this case by itself.
I'll check whether we can improve the core iomap layer to avoid
filesystem-specific put_folio handling for ->valid_size.
>
> > +/*
> > + * exfat_file_write_dio_end_io - Direct I/O write completion handler
> > + *
> > + * Updates i_size if the write extended the file. Called from the dio layer
> > + * after I/O completion.
> > + */
> > +static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
> > +             int error, unsigned int flags)
> > +{
> > +     struct inode *inode = file_inode(iocb->ki_filp);
> > +
> > +     if (error)
> > +             return error;
> > +
> > +     if (size && i_size_read(inode) < iocb->ki_pos + size) {
> > +             i_size_write(inode, iocb->ki_pos + size);
> > +             mark_inode_dirty(inode);
> > +     }
> > +
> > +     return 0;
> > +}
>
> I think in the long run we should just do this as the default in
> the core iomap dio code when no end_io routine is provided for
> writes.  But I can refactor this later to not hold you up.
Okay, Thanks!
>
> > +static int exfat_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > +             unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> > +{
>
> The read and write routines look very similar, and chance we could
> extra most of the logic into a common helper?  Also the rounding of
> the length using the start in the write version looks like it would
> be the right thing for the read side anyway.
Okay. I'll factor out the common logic into a helper.
>
> > +static int exfat_mkwrite_iomap_begin(struct inode *inode, loff_t offset,
> > +             loff_t length, unsigned int flags, struct iomap *iomap,
> > +             struct iomap *srcmap)
> > +{
> > +     return __exfat_write_iomap_begin(inode, offset, length, iomap);
> > +}
>
> The special mkwrite handling looks a bit odd to me.  I'll return to
> that later in the series, but this is a good example where keeping
> all the related code together would help the reviewer.
Okay. I will fix it.
>
> > +     if (offset < wpc->iomap.offset ||
> > +         offset >= wpc->iomap.offset + wpc->iomap.length) {
> > +             int error;
> > +
> > +             error = exfat_read_iomap_begin(wpc->inode, offset, len,
> > +                             0, &wpc->iomap, NULL);
>
> The read confused me a bit here, but I guess by the time we do writeback
> everything is allocated in exfat.  This would be another good candidate
> to directly call the low-level helper suggested above.
Agreed. As you suggested, I'll change it to directly call the
low-level common helper.
>
> > diff --git a/fs/exfat/super.c b/fs/exfat/super.c
> > index 83396fd265cd..b69c4b0a926b 100644
> > --- a/fs/exfat/super.c
> > +++ b/fs/exfat/super.c
> > @@ -499,6 +499,7 @@ static int exfat_read_boot_sector(struct super_block *sb)
> >       if (p_boot->num_fats == 2)
> >               sbi->FAT2_start_sector += sbi->num_FAT_sectors;
> >       sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset);
> > +     sbi->data_start_bytes = sbi->data_start_sector << p_boot->sect_size_bits;
>
> Is this related to iomap?
Yes, I'll split it out into a separate prep patch together with the
other related changes, so it will be clearer.
Thanks for your review!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-30  2:45   ` Chi Zhiling
@ 2026-03-31  5:29     ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  5:29 UTC (permalink / raw)
  To: Chi Zhiling
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, hch

On Mon, Mar 30, 2026 at 11:46 AM Chi Zhiling <chizhiling@163.com> wrote:
>
> Hi, Namjae
Hi Chi,
>
> Could you CC my personal email "chizhiling@163.com" when sending v2?
> since my work email can’t receive messages immediately.
Sure.
> > +     mutex_lock(&sbi->s_lock);
> > +     old_valid_size = ei->valid_size;
> > +     mutex_unlock(&sbi->s_lock);
>
> I’ve recently been refactoring around this lock :)
Okay.
> > +     cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
> > +     cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
> > +     if (length > cluster_length - cluster_offset)
> > +             iomap->length = cluster_length - cluster_offset;
> > +     else
> > +             iomap->length = length;
>
> Using min here would be clearer
Okay.
>
> > +
> > +     iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
> > +     if (offset >= ei->valid_size)
> > +             iomap->type = IOMAP_UNWRITTEN;
> > +     else
> > +             iomap->type = IOMAP_MAPPED;
> > +
> > +     if (!(flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED &&
> > +         iomap->offset < ei->valid_size &&
> > +         iomap->offset + iomap->length > ei->valid_size) {
> > +             iomap->length = round_up(ei->valid_size, 1 << inode->i_blkbits) -
> > +                     iomap->offset;
>
> Aligning the end of the extent to the block size seems like a good
> option, since iomap can’t handle unaligned cases.
>
> However, If valid_size isn’t block-aligned, then the extent will cover
> valid_size, and we should clear the region beyond valid_size, right?
>
> But it seems that reads don’t do this, and they don’t call
> exfat_iomap_put_folio.
> Have I missed something?
You seem right, I will check it more.
Thanks for your review:)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-31  5:22     ` Namjae Jeon
@ 2026-03-31  5:46       ` Christoph Hellwig
  2026-03-31  6:36         ` Namjae Jeon
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-31  5:46 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: Christoph Hellwig, sj1557.seo, yuezhang.mo, linux-fsdevel,
	anmuxixixi, dxdt, chizhiling, chizhiling

On Tue, Mar 31, 2026 at 02:22:57PM +0900, Namjae Jeon wrote:
> There is a deadlock issue when using iomap_dio_rw(). In
> exfat_write_iter(), the inode lock is held while calling iomap_dio_rw.
> generic_write_sync()->__generic_file_fsync() in iomap_dio_complete()
> can be triggered, which attempts to acquire the inode lock again.

Can you document this?  Note that Jan has looked into __generic_file_fsync
and decided i_rwsem isn't needed there.  So maybe as a first step just
switch exfat to generic_buffers_fsync_noflush, which doesn't need this
(or way if/when Jan's series gets merged) and keep using a generic
version?  If not document in the commit log why this is changed.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-31  5:26     ` Namjae Jeon
@ 2026-03-31  5:48       ` Christoph Hellwig
  2026-03-31  6:44         ` Namjae Jeon
  2026-04-01  3:07       ` Chi Zhiling
  1 sibling, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-31  5:48 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: Christoph Hellwig, sj1557.seo, yuezhang.mo, linux-fsdevel,
	anmuxixixi, dxdt, chizhiling, chizhiling, Darrick J. Wong

On Tue, Mar 31, 2026 at 02:26:55PM +0900, Namjae Jeon wrote:
> > Can you explain the logic here?  Shouldn't the iomap buffered I/O
> > code do all the needed zeroing for you based on the map type?  If not
> > how could we enhance the core iomap code so that we don't need this
> > in the file system, which feels like a bit of break of abstraction
> > barriers?
> The reason we have exfat_iomap_put_folio() is because of exFAT's
> VDL(->valid_size). When we map a range beyond ->valid_size, if we set
> the map type to IOMAP_UNWRITTEN, iomap_zero_range() does not perform
> zeroing.
> However, if we set it to IOMAP_MAPPED, then iomap_zero_range() treats
> the area beyond ->valid_size as valid data and could expose
> uninitialized garbage data from disk. So it explicitly zero out the
> portion beyond ->valid_size in exfat_iomap_put_folio().
> I agree that it would be better if core iomap could handle this case by itself.
> I'll check whether we can improve the core iomap layer to avoid
> filesystem-specific put_folio handling for ->valid_size.

I suspect just adding a new IOMAP_ type for this should be fine, as
the above description sounds coherent enough to add it to documentation.
I just can't really think of a good name.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-31  5:46       ` Christoph Hellwig
@ 2026-03-31  6:36         ` Namjae Jeon
  2026-03-31  6:37           ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  6:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Tue, Mar 31, 2026 at 2:46 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Mar 31, 2026 at 02:22:57PM +0900, Namjae Jeon wrote:
> > There is a deadlock issue when using iomap_dio_rw(). In
> > exfat_write_iter(), the inode lock is held while calling iomap_dio_rw.
> > generic_write_sync()->__generic_file_fsync() in iomap_dio_complete()
> > can be triggered, which attempts to acquire the inode lock again.
>
> Can you document this?  Note that Jan has looked into __generic_file_fsync
> and decided i_rwsem isn't needed there.  So maybe as a first step just
> switch exfat to generic_buffers_fsync_noflush, which doesn't need this
> (or way if/when Jan's series gets merged) and keep using a generic
> version?  If not document in the commit log why this is changed.
Since Jan's series renames __generic_file_fsync() to
simple_fsync_noflush(), switching to generic_buffers_fsync_noflush()
now would cause a conflict with his patchset. I think that we can wait
for Jan’s series to be merged first. So I will drop the
exfat_file_fsync changes in the exfat iomap patchset.
Thanks!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-31  6:36         ` Namjae Jeon
@ 2026-03-31  6:37           ` Christoph Hellwig
  2026-03-31  6:58             ` Namjae Jeon
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2026-03-31  6:37 UTC (permalink / raw)
  To: Namjae Jeon
  Cc: Christoph Hellwig, sj1557.seo, yuezhang.mo, linux-fsdevel,
	anmuxixixi, dxdt, chizhiling, chizhiling

On Tue, Mar 31, 2026 at 03:36:35PM +0900, Namjae Jeon wrote:
> Since Jan's series renames __generic_file_fsync() to
> simple_fsync_noflush(), switching to generic_buffers_fsync_noflush()
> now would cause a conflict with his patchset. I think that we can wait
> for Jan’s series to be merged first. So I will drop the
> exfat_file_fsync changes in the exfat iomap patchset.

Or just open code it in exfat for now and switch beack to the generic
one once that becomes suitable again.  But we're pretty late for the
7.1 merge window anyway, so maybe not rushing this might be a better
idea.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-31  5:48       ` Christoph Hellwig
@ 2026-03-31  6:44         ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  6:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling, Darrick J. Wong

On Tue, Mar 31, 2026 at 2:49 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Mar 31, 2026 at 02:26:55PM +0900, Namjae Jeon wrote:
> > > Can you explain the logic here?  Shouldn't the iomap buffered I/O
> > > code do all the needed zeroing for you based on the map type?  If not
> > > how could we enhance the core iomap code so that we don't need this
> > > in the file system, which feels like a bit of break of abstraction
> > > barriers?
> > The reason we have exfat_iomap_put_folio() is because of exFAT's
> > VDL(->valid_size). When we map a range beyond ->valid_size, if we set
> > the map type to IOMAP_UNWRITTEN, iomap_zero_range() does not perform
> > zeroing.
> > However, if we set it to IOMAP_MAPPED, then iomap_zero_range() treats
> > the area beyond ->valid_size as valid data and could expose
> > uninitialized garbage data from disk. So it explicitly zero out the
> > portion beyond ->valid_size in exfat_iomap_put_folio().
> > I agree that it would be better if core iomap could handle this case by itself.
> > I'll check whether we can improve the core iomap layer to avoid
> > filesystem-specific put_folio handling for ->valid_size.
>
> I suspect just adding a new IOMAP_ type for this should be fine, as
> the above description sounds coherent enough to add it to documentation.
> I just can't really think of a good name.
Let me check it:)
Thanks for the feedback.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-31  6:37           ` Christoph Hellwig
@ 2026-03-31  6:58             ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-03-31  6:58 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling, chizhiling

On Tue, Mar 31, 2026 at 3:38 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Mar 31, 2026 at 03:36:35PM +0900, Namjae Jeon wrote:
> > Since Jan's series renames __generic_file_fsync() to
> > simple_fsync_noflush(), switching to generic_buffers_fsync_noflush()
> > now would cause a conflict with his patchset. I think that we can wait
> > for Jan’s series to be merged first. So I will drop the
> > exfat_file_fsync changes in the exfat iomap patchset.
>
> Or just open code it in exfat for now and switch beack to the generic
> one once that becomes suitable again.  But we're pretty late for the
> 7.1 merge window anyway, so maybe not rushing this might be a better
> idea.
There are still several things I need to check and improve, including
possible changes to the core iomap layer.
So I agree that It looks difficult to get this series merged in the
7.1 window. But I'll submit v2 patch-set so it can receive further
review.
Thanks!
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
  2026-03-30  2:45   ` Chi Zhiling
  2026-03-30  6:30   ` Christoph Hellwig
@ 2026-04-01  2:24   ` Yuezhang.Mo
  2026-04-01  2:47     ` Namjae Jeon
  2026-04-06 13:45   ` David Timber
  2026-04-06 14:13   ` David Timber
  4 siblings, 1 reply; 31+ messages in thread
From: Yuezhang.Mo @ 2026-04-01  2:24 UTC (permalink / raw)
  To: Namjae Jeon, sj1557.seo@samsung.com
  Cc: linux-fsdevel@vger.kernel.org, anmuxixixi@gmail.com,
	dxdt@dev.snart.me, chizhiling@kylinos.cn, hch@lst.de

> +static int __exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> +               loff_t length, struct iomap *iomap)
> +{
> +       struct super_block *sb = inode->i_sb;
> +       struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +       unsigned int cluster, num_clusters;
> +       loff_t cluster_offset, cluster_length;
> +       int err;
> +       bool balloc = false;
> +
> +       num_clusters = max(EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) -
> +               EXFAT_B_TO_CLU_ROUND_UP(offset, sbi), 1);
> +       mutex_lock(&sbi->s_lock);
> +       err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> +                       &cluster, &num_clusters, true, &balloc);

num_clusters is the number of clusters which the region [offset, offset+length] is
distributed.

num_clusters should be set as follows in both __exfat_write_iomap_begin and
exfat_read_iomap_begin.

  num_clusters = EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) - EXFAT_B_TO_CLU(offset, sbi);

And num_to_be_allocated in exfat_map_cluster() should be set as follows.

     num_to_be_allocated = 0;
     if (clu_offset + *count > num_clusters)
             num_to_be_allocated = clu_offset + *count - num_clusters;

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-04-01  2:24   ` Yuezhang.Mo
@ 2026-04-01  2:47     ` Namjae Jeon
  0 siblings, 0 replies; 31+ messages in thread
From: Namjae Jeon @ 2026-04-01  2:47 UTC (permalink / raw)
  To: Yuezhang.Mo@sony.com
  Cc: sj1557.seo@samsung.com, linux-fsdevel@vger.kernel.org,
	anmuxixixi@gmail.com, dxdt@dev.snart.me, chizhiling@kylinos.cn,
	hch@lst.de

On Wed, Apr 1, 2026 at 11:24 AM Yuezhang.Mo@sony.com
<Yuezhang.Mo@sony.com> wrote:
>
> > +static int __exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> > +               loff_t length, struct iomap *iomap)
> > +{
> > +       struct super_block *sb = inode->i_sb;
> > +       struct exfat_sb_info *sbi = EXFAT_SB(sb);
> > +       unsigned int cluster, num_clusters;
> > +       loff_t cluster_offset, cluster_length;
> > +       int err;
> > +       bool balloc = false;
> > +
> > +       num_clusters = max(EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) -
> > +               EXFAT_B_TO_CLU_ROUND_UP(offset, sbi), 1);
> > +       mutex_lock(&sbi->s_lock);
> > +       err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> > +                       &cluster, &num_clusters, true, &balloc);
>
> num_clusters is the number of clusters which the region [offset, offset+length] is
> distributed.
>
> num_clusters should be set as follows in both __exfat_write_iomap_begin and
> exfat_read_iomap_begin.
>
>   num_clusters = EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) - EXFAT_B_TO_CLU(offset, sbi);
>
> And num_to_be_allocated in exfat_map_cluster() should be set as follows.
>
>      num_to_be_allocated = 0;
>      if (clu_offset + *count > num_clusters)
>              num_to_be_allocated = clu_offset + *count - num_clusters;
Okay, I will update it on the next version.
Thanks for the review:)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-31  5:26     ` Namjae Jeon
  2026-03-31  5:48       ` Christoph Hellwig
@ 2026-04-01  3:07       ` Chi Zhiling
  1 sibling, 0 replies; 31+ messages in thread
From: Chi Zhiling @ 2026-04-01  3:07 UTC (permalink / raw)
  To: Namjae Jeon, Christoph Hellwig
  Cc: sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi, dxdt,
	chizhiling

On 3/31/26 1:26 PM, Namjae Jeon wrote:
> On Mon, Mar 30, 2026 at 3:31 PM Christoph Hellwig <hch@lst.de> wrote:
>>
>>>
>>> -static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
>>> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
>>
>> This looks like at least partially unrelated refactoring.  Can you
>> split this out into a separate well-documented patch, or maybe even
>> two where one has the bulk reformatting changes, and one adds the
>> new bsync option?  Also it might help to document the bsync option.
>> Often it might be more readable to add a flags parameter with a named
>> flag to make things easier to follow.
> Okay. I'll split it out into a separate patch and add a flags
> parameter with documentation.
>>
>>> -static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
>>> -             unsigned int *clu, unsigned int *count, int create)
>>> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
>>> +             unsigned int *clu, unsigned int *count, int create,
>>> +             bool *balloc)
>>
>> Similarly, this would be a good prep patch.
> Okay.
>>
>>> diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
>>> new file mode 100644
>>> index 000000000000..e4135a13454f
>>> --- /dev/null
>>> +++ b/fs/exfat/iomap.c
>>> @@ -0,0 +1,305 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/*
>>> + * iomap callack functions
>>> + *
>>> + * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
>>> + */
>>
>> Also normally new infrastructure would get added with the users.
>> I.e. the bits you're using in the first iomap conversion would go
>> with that and so on.  But that's not a strict rule.
> I'll try to split the introduction of the iomap infrastructure so that
> only the parts actually used in each conversion step are added
> together with the users.
>>
>>> +/*
>>> + * exfat_iomap_put_folio - Put folio after iomap operation
>>> + *
>>> + * Called when iomap is finished with a folio zero-fills portions of
>>> + * the folio beyond ->valid_size to prevent exposing uninitialized data.
>>> + */
>>> +static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
>>> +             unsigned int len, struct folio *folio)
>>
>> Can you explain the logic here?  Shouldn't the iomap buffered I/O
>> code do all the needed zeroing for you based on the map type?  If not
>> how could we enhance the core iomap code so that we don't need this
>> in the file system, which feels like a bit of break of abstraction
>> barriers?
> The reason we have exfat_iomap_put_folio() is because of exFAT's
> VDL(->valid_size). When we map a range beyond ->valid_size, if we set
> the map type to IOMAP_UNWRITTEN, iomap_zero_range() does not perform
> zeroing.> However, if we set it to IOMAP_MAPPED, then iomap_zero_range() treats
> the area beyond ->valid_size as valid data and could expose
> uninitialized garbage data from disk. So it explicitly zero out the
> portion beyond ->valid_size in exfat_iomap_put_folio().
> I agree that it would be better if core iomap could handle this case by itself.
> I'll check whether we can improve the core iomap layer to avoid
> filesystem-specific put_folio handling for ->valid_size.

Perhaps all we need is to use a new iomap_ops in exfat_extend_valid_size 
that treats regions beyond ->valid_size as mapped.

And call exfat_extend_valid_size in advance to extend valid_size so that 
it is aligned to the block size before reads and writes.

Just for what it's worth


Thanks,


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-03-26 11:50 ` [PATCH 3/5] exfat: add iomap buffered " Namjae Jeon
  2026-03-30  6:38   ` Christoph Hellwig
@ 2026-04-06 13:09   ` David Timber
  2026-04-07  6:28     ` Christoph Hellwig
  1 sibling, 1 reply; 31+ messages in thread
From: David Timber @ 2026-04-06 13:09 UTC (permalink / raw)
  To: Namjae Jeon, sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, hch, chizhiling

On 3/26/26 20:50, Namjae Jeon wrote:
> Add full buffered I/O support using the iomap framework to the exfat
> filesystem. This replaces the old exfat_get_block(), exfat_write_begin(),
> exfat_write_end(), and exfat_block_truncate_page() functions with their
> iomap equivalents. Buffered writes now use iomap_file_buffered_write(),
> read uses iomap_bio_read_folio() and iomap_bio_readahead(), and writeback
> is handled through iomap_writepages().
> 
> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>  fs/exfat/exfat_fs.h |   3 +-
>  fs/exfat/file.c     | 160 +++++++++++++++++++--------
>  fs/exfat/inode.c    | 261 ++++----------------------------------------
>  3 files changed, 142 insertions(+), 282 deletions(-)
> 
> diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
> index 860f2e438b63..54da001a8f55 100644
> --- a/fs/exfat/exfat_fs.h
> +++ b/fs/exfat/exfat_fs.h
> @@ -87,7 +87,7 @@ enum {
>  /*
>   * helpers for cluster size to byte conversion.
>   */
> -#define EXFAT_CLU_TO_B(b, sbi)		((b) << (sbi)->cluster_size_bits)
> +#define EXFAT_CLU_TO_B(b, sbi)		((loff_t)(b) << (sbi)->cluster_size_bits)

I think type casting should be left to the users of the macro. These
helper macros are quite dangerous in case of (ex)FAT because integer
overflows are unchecked for shift op. The on-disk format is 32 bit but
the kernel has switched over to "all-64-bit".

That particular macro is used in dirent update code and, in particular,
in exfat_map_cluster():

    inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;

The type of i_blocks is blkcnt_t, which is unsigned. loff_t is signed.
exFAT got away with it because Linux write calls expand the size of
files no more than 0x7ffff000 bytes and the size of dir is limited to
256MB. However, someone could come in and decide to use
exfat_map_cluster() to allocate more than this limit.

I don't think we can win because of the inherent discrepency between the
on-disk format and the kernel interface. How the calculated value is
used cannot be safely assumed.

>  #define EXFAT_B_TO_CLU(b, sbi)		((b) >> (sbi)->cluster_size_bits)
>  #define EXFAT_B_TO_CLU_ROUND_UP(b, sbi)	\
>  	(((b - 1) >> (sbi)->cluster_size_bits) + 1)
> @@ -551,7 +551,6 @@ struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
>  int __exfat_write_inode(struct inode *inode, int sync);
>  int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
>  void exfat_evict_inode(struct inode *inode);
> -int exfat_block_truncate_page(struct inode *inode, loff_t from);
>  int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
>  		unsigned int *clu, unsigned int *count, int create,
>  		bool *balloc);
> diff --git a/fs/exfat/file.c b/fs/exfat/file.c
> index 2a9263b4433b..5f85e2e0a71e 100644
> --- a/fs/exfat/file.c
> +++ b/fs/exfat/file.c
> @@ -337,7 +337,18 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
>  
>  	if ((attr->ia_valid & ATTR_SIZE) &&
>  	    attr->ia_size > i_size_read(inode)) {
> +		loff_t old_size = i_size_read(inode);
> +
>  		error = exfat_cont_expand(inode, attr->ia_size);
> +		if (!error && attr->ia_size > old_size &&
> +		    old_size % PAGE_SIZE != 0) {
> +			loff_t len = min_t(loff_t,
> +					round_up(old_size, PAGE_SIZE) - old_size,
> +					attr->ia_size - old_size);
> +			error = iomap_zero_range(inode, old_size, len,
> +					NULL, &exfat_read_iomap_ops,
> +					&exfat_iomap_folio_ops, NULL);
> +		}
>  		if (error || attr->ia_valid == ATTR_SIZE)
>  			return error;
>  		attr->ia_valid &= ~ATTR_SIZE;
> @@ -384,7 +395,10 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
>  	exfat_truncate_inode_atime(inode);
>  
>  	if (attr->ia_valid & ATTR_SIZE) {
> -		error = exfat_block_truncate_page(inode, attr->ia_size);
> +		inode_dio_wait(inode);
> +		error = iomap_truncate_page(inode, attr->ia_size, NULL,
> +				&exfat_read_iomap_ops,
> +				&exfat_iomap_folio_ops, NULL);
>  		if (error)
>  			goto out;
>  
> @@ -619,10 +633,14 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
>  	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
>  		return -EIO;
>  
> -	err = __generic_file_fsync(filp, start, end, datasync);
> +	err = file_write_and_wait_range(filp, start, end);
>  	if (err)
>  		return err;
>  
> +	if (!datasync)
> +		err = __exfat_write_inode(inode, 1);
> +	write_inode_now(inode, !datasync);
> +
>  	err = sync_blockdev(inode->i_sb->s_bdev);
>  	if (err)
>  		return err;
> @@ -648,12 +666,56 @@ int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
>  				NULL);
>  		if (!ret && bsync)
>  			ret = filemap_write_and_wait_range(inode->i_mapping,
> -					old_valid_size, off - 1);
> +							   old_valid_size,
> +							   off - 1);
>  	}
>  
>  	return ret;
>  }
>  
> +static ssize_t exfat_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +{
> +	ssize_t ret;
> +
> +	ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
> +			&exfat_write_dio_ops, 0, NULL, 0);
> +	if (ret == -ENOTBLK)
> +		ret = 0;
> +	else if (ret < 0)
> +		goto out;
> +
> +	if (iov_iter_count(from)) {
> +		loff_t offset, end;
> +		ssize_t written;
> +		int ret2;
> +
> +		offset = iocb->ki_pos;
> +		iocb->ki_flags &= ~IOCB_DIRECT;
> +		written = iomap_file_buffered_write(iocb, from,
> +				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
> +				NULL);
> +		if (written < 0) {
> +			ret = written;
> +			goto out;
> +		}
> +
> +		ret += written;
> +		end = iocb->ki_pos + written - 1;
> +		ret2 = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
> +				offset, end);
> +		if (ret2) {
> +			ret = -EIO;
> +			goto out;
> +		}
> +		if (!ret2)
> +			invalidate_mapping_pages(iocb->ki_filp->f_mapping,
> +					offset >> PAGE_SHIFT,
> +					end >> PAGE_SHIFT);
> +	}
> +out:
> +	return ret;
> +}

There's nothing to clean up upon error here. Suggest removing goto. I
noticed that gotos are generally removed where appropriate in this
serious of patches, but it looks like you've missed this bit.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
                     ` (2 preceding siblings ...)
  2026-04-01  2:24   ` Yuezhang.Mo
@ 2026-04-06 13:45   ` David Timber
  2026-04-06 14:13   ` David Timber
  4 siblings, 0 replies; 31+ messages in thread
From: David Timber @ 2026-04-06 13:45 UTC (permalink / raw)
  To: Namjae Jeon, sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, chizhiling, hch

On 3/26/26 20:50, Namjae Jeon wrote:
> Add iomap support to the exfat filesystem. This patch introduces the
> necessary iomap infrastructure by adding a new iomap.c file and related
> iomap operations. The main change is converting exfat_extend_valid_size()
> to use iomap_zero_range() instead of the legacy write_begin/write_end path.
> To support this, exfat_map_cluster() is extended to return whether a new
> cluster was allocated via a balloc flag, and a new helper function
> exfat_cluster_to_phys() is added. Also, data_start_bytes is added to
> struct exfat_sb_info for easier conversion from cluster number to physical
> byte offset.
> 
> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>  fs/exfat/Makefile   |   2 +-
>  fs/exfat/exfat_fs.h |  12 ++
>  fs/exfat/file.c     |  54 +++-----
>  fs/exfat/inode.c    |   9 +-
>  fs/exfat/iomap.c    | 305 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/exfat/iomap.h    |  16 +++
>  fs/exfat/super.c    |   1 +
>  7 files changed, 361 insertions(+), 38 deletions(-)
>  create mode 100644 fs/exfat/iomap.c
>  create mode 100644 fs/exfat/iomap.h
> 
> diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
> index ed51926a4971..e06bf85870ae 100644
> --- a/fs/exfat/Makefile
> +++ b/fs/exfat/Makefile
> @@ -5,4 +5,4 @@
>  obj-$(CONFIG_EXFAT_FS) += exfat.o
>  
>  exfat-y	:= inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \
> -	   file.o balloc.o
> +	   file.o balloc.o iomap.o
> diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
> index 9fed9fb33cae..860f2e438b63 100644
> --- a/fs/exfat/exfat_fs.h
> +++ b/fs/exfat/exfat_fs.h
> @@ -259,6 +259,7 @@ struct exfat_sb_info {
>  	unsigned long long FAT1_start_sector; /* FAT1 start sector */
>  	unsigned long long FAT2_start_sector; /* FAT2 start sector */
>  	unsigned long long data_start_sector; /* data area start sector */
> +	unsigned long long data_start_bytes;
>  	unsigned int num_FAT_sectors; /* num of FAT sectors */
>  	unsigned int root_dir; /* root dir cluster */
>  	unsigned int dentries_per_clu; /* num of dentries per cluster */
> @@ -432,6 +433,13 @@ static inline loff_t exfat_ondisk_size(const struct inode *inode)
>  	return ((loff_t)inode->i_blocks) << 9;
>  }
>  
> +static inline loff_t exfat_cluster_to_phys(struct exfat_sb_info *sbi,
> +		unsigned int clus)
> +{
> +	return ((loff_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->cluster_size_bits) +
> +		sbi->data_start_bytes;
> +}
> +
>  /* super.c */
>  int exfat_set_volume_dirty(struct super_block *sb);
>  int exfat_clear_volume_dirty(struct super_block *sb);
> @@ -480,6 +488,7 @@ long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
>  long exfat_compat_ioctl(struct file *filp, unsigned int cmd,
>  				unsigned long arg);
>  int exfat_force_shutdown(struct super_block *sb, u32 flags);
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync);
>  
>  /* namei.c */
>  extern const struct dentry_operations exfat_dentry_ops;
> @@ -543,6 +552,9 @@ int __exfat_write_inode(struct inode *inode, int sync);
>  int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
>  void exfat_evict_inode(struct inode *inode);
>  int exfat_block_truncate_page(struct inode *inode, loff_t from);
> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> +		unsigned int *clu, unsigned int *count, int create,
> +		bool *balloc);
>  
>  /* exfat/nls.c */
>  unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
> diff --git a/fs/exfat/file.c b/fs/exfat/file.c
> index 2daf0dbabb24..756846b774c4 100644
> --- a/fs/exfat/file.c
> +++ b/fs/exfat/file.c
> @@ -14,9 +14,11 @@
>  #include <linux/writeback.h>
>  #include <linux/filelock.h>
>  #include <linux/falloc.h>
> +#include <linux/iomap.h>
>  
>  #include "exfat_raw.h"
>  #include "exfat_fs.h"
> +#include "iomap.h"
>  
>  static int exfat_cont_expand(struct inode *inode, loff_t size)
>  {
> @@ -628,44 +630,28 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
>  	return blkdev_issue_flush(inode->i_sb->s_bdev);
>  }
>  
> -static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync)
>  {
> -	int err;
> -	loff_t pos;
>  	struct exfat_inode_info *ei = EXFAT_I(inode);
> -	struct address_space *mapping = inode->i_mapping;
> -	const struct address_space_operations *ops = mapping->a_ops;
> -
> -	pos = ei->valid_size;
> -	while (pos < new_valid_size) {
> -		u32 len;
> -		struct folio *folio;
> -		unsigned long off;
> -
> -		len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
> -		if (pos + len > new_valid_size)
> -			len = new_valid_size - pos;
> -
> -		err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
> -		if (err)
> -			goto out;
> -
> -		off = offset_in_folio(folio, pos);
> -		folio_zero_new_buffers(folio, off, off + len);
> +	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
> +	loff_t old_valid_size;
> +	int ret = 0;
>  
> -		err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
> -		if (err < 0)
> -			goto out;
> -		pos += len;
> +	mutex_lock(&sbi->s_lock);
> +	old_valid_size = ei->valid_size;
> +	mutex_unlock(&sbi->s_lock);
>  
> -		balance_dirty_pages_ratelimited(mapping);
> -		cond_resched();
> +	if (old_valid_size < off) {
> +		ret = iomap_zero_range(inode, old_valid_size,
> +				off - old_valid_size, NULL,
> +				&exfat_write_iomap_ops, &exfat_iomap_folio_ops,
> +				NULL);
> +		if (!ret && bsync)
> +			ret = filemap_write_and_wait_range(inode->i_mapping,
> +					old_valid_size, off - 1);
>  	}
>  
> -	return 0;
> -
> -out:
> -	return err;
> +	return ret;
>  }
>  
>  static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
> @@ -702,7 +688,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
>  	}
>  
>  	if (pos > valid_size) {
> -		ret = exfat_extend_valid_size(inode, pos);
> +		ret = exfat_extend_valid_size(inode, pos, false);
>  		if (ret < 0 && ret != -ENOSPC) {
>  			exfat_err(inode->i_sb,
>  				"write: fail to zero from %llu to %llu(%zd)",
> @@ -760,7 +746,7 @@ static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
>  	new_valid_size = min(new_valid_size, i_size_read(inode));
>  
>  	if (ei->valid_size < new_valid_size) {
> -		err = exfat_extend_valid_size(inode, new_valid_size);
> +		err = exfat_extend_valid_size(inode, new_valid_size, false);
>  		if (err < 0) {
>  			inode_unlock(inode);
>  			return vmf_fs_error(err);
> diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
> index beb9ea7cca9f..cc54cce65a31 100644
> --- a/fs/exfat/inode.c
> +++ b/fs/exfat/inode.c
> @@ -123,8 +123,9 @@ void exfat_sync_inode(struct inode *inode)
>   * Output: errcode, cluster number
>   * *clu = (~0), if it's unable to allocate a new cluster
>   */
> -static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> -		unsigned int *clu, unsigned int *count, int create)
> +int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
> +		unsigned int *clu, unsigned int *count, int create,
> +		bool *balloc)
>  {
>  	int ret;
>  	unsigned int last_clu;
> @@ -235,6 +236,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
>  			}
>  		}
>  		*count = 1;
> +		*balloc = true;
>  	}
>  
>  	/* hint information */
> @@ -258,6 +260,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
>  	sector_t phys = 0;
>  	sector_t valid_blks;
>  	loff_t i_size;
> +	bool balloc;
>  
>  	mutex_lock(&sbi->s_lock);
>  	i_size = i_size_read(inode);
> @@ -268,7 +271,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
>  	/* Is this block already allocated? */
>  	count = EXFAT_B_TO_CLU_ROUND_UP(bh_result->b_size, sbi);
>  	err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits,
> -			&cluster, &count, create);
> +			&cluster, &count, create, &balloc);
>  	if (err) {
>  		if (err != -ENOSPC)
>  			exfat_fs_error_ratelimit(sb,
> diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
> new file mode 100644
> index 000000000000..e4135a13454f
> --- /dev/null
> +++ b/fs/exfat/iomap.c
> @@ -0,0 +1,305 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * iomap callack functions
> + *
> + * Copyright (C) 2026 Namjae Jeon <linkinjeon@kernel.org>
> + */
> +
> +#include <linux/iomap.h>
> +#include <linux/pagemap.h>
> +
> +#include "exfat_raw.h"
> +#include "exfat_fs.h"
> +#include "iomap.h"
> +
> +/*
> + * exfat_iomap_put_folio - Put folio after iomap operation
> + *
> + * Called when iomap is finished with a folio zero-fills portions of
> + * the folio beyond ->valid_size to prevent exposing uninitialized data.
> + */
> +static void exfat_iomap_put_folio(struct inode *inode, loff_t pos,
> +		unsigned int len, struct folio *folio)
> +{
> +	struct exfat_inode_info *ei = EXFAT_I(inode);
> +	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
> +	unsigned long sector_size = 1UL << inode->i_blkbits;
> +	loff_t start_down, end_up, init;
> +
> +	mutex_lock(&sbi->s_lock);
> +	start_down = round_down(pos, sector_size);
> +	end_up = (pos + len - 1) | (sector_size - 1);
> +	init = ei->valid_size;
> +
> +	if (init >= start_down && init <= end_up) {
> +		if (init < pos) {
> +			loff_t offset = offset_in_folio(folio, pos + len);
> +
> +			if (offset == 0)
> +				offset = folio_size(folio);
> +			folio_zero_segments(folio,
> +					offset_in_folio(folio, init),
> +					offset_in_folio(folio, pos),
> +					offset,
> +					folio_size(folio));
> +
> +		} else  {
> +			loff_t offset = max_t(loff_t, pos + len, init);
> +
> +			offset = offset_in_folio(folio, offset);
> +			if (offset == 0)
> +				offset = folio_size(folio);
> +			folio_zero_segment(folio,
> +					offset,
> +					folio_size(folio));
> +		}
> +	} else if (init <= pos) {
> +		loff_t offset = 0, offset2 = offset_in_folio(folio, pos + len);
> +
> +		if ((init >> folio_shift(folio)) == (pos >> folio_shift(folio)))
> +			offset = offset_in_folio(folio, init);
> +		if (offset2 == 0)
> +			offset2 = folio_size(folio);
> +		folio_zero_segments(folio,
> +				offset,
> +				offset_in_folio(folio, pos),
> +				offset2,
> +				folio_size(folio));
> +	}
> +
> +	folio_unlock(folio);
> +	folio_put(folio);
> +	mutex_unlock(&sbi->s_lock);
> +}
> +
> +const struct iomap_write_ops exfat_iomap_folio_ops = {
> +	.put_folio = exfat_iomap_put_folio,
> +};
> +
> +/*
> + * exfat_file_write_dio_end_io - Direct I/O write completion handler
> + *
> + * Updates i_size if the write extended the file. Called from the dio layer
> + * after I/O completion.
> + */
> +static int exfat_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
> +		int error, unsigned int flags)
> +{
> +	struct inode *inode = file_inode(iocb->ki_filp);
> +
> +	if (error)
> +		return error;
> +
> +	if (size && i_size_read(inode) < iocb->ki_pos + size) {
> +		i_size_write(inode, iocb->ki_pos + size);
> +		mark_inode_dirty(inode);
> +	}
> +
> +	return 0;
> +}
> +
> +const struct iomap_dio_ops exfat_write_dio_ops = {
> +	.end_io		= exfat_file_write_dio_end_io,
> +};
> +
> +/*
> + * exfat_read_iomap_begin - Begin mapping for reads
> + *
> + * Maps file range to disk location for read operations (read folio,
> + * readahead, direct I/O read, etc.).
> + *
> + * Returns IOMAP_MAPPED for areas within ->valid_size, and IOMAP_UNWRITTEN
> + * for allocated but uninitialized regions beyond ->valid_size.
> + */
> +static int exfat_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +	struct exfat_inode_info *ei = EXFAT_I(inode);
> +	unsigned int cluster, num_clusters = EXFAT_B_TO_CLU_ROUND_UP(length, sbi);
> +	loff_t cluster_offset, cluster_length;
> +	int err = 0;
> +	bool balloc = false;
> +
> +	mutex_lock(&sbi->s_lock);
> +	iomap->bdev = inode->i_sb->s_bdev;
> +	iomap->offset = offset;
> +
> +	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> +			&cluster, &num_clusters, false, &balloc);
> +	if (err)
> +		goto out;
> +
> +	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
> +	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
> +	if (length > cluster_length - cluster_offset)
> +		iomap->length = cluster_length - cluster_offset;
> +	else
> +		iomap->length = length;
> +
> +	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
> +	if (offset >= ei->valid_size)
> +		iomap->type = IOMAP_UNWRITTEN;
> +	else
> +		iomap->type = IOMAP_MAPPED;
> +
> +	if (!(flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED &&
> +	    iomap->offset < ei->valid_size &&
> +	    iomap->offset + iomap->length > ei->valid_size) {
> +		iomap->length = round_up(ei->valid_size, 1 << inode->i_blkbits) -
> +			iomap->offset;
> +	}
> +
> +	iomap->flags |= IOMAP_F_MERGED;
> +out:
> +	mutex_unlock(&sbi->s_lock);
> +	return err;
> +}
> +
> +const struct iomap_ops exfat_read_iomap_ops = {
> +	.iomap_begin = exfat_read_iomap_begin,
> +};
> +
> +/*
> + * __exfat_write_iomap_begin - mapping logic for writes
> + *
> + * Maps the requested range and allocates clusters if needed.
> + */
> +static int __exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, struct iomap *iomap)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +	unsigned int cluster, num_clusters;
> +	loff_t cluster_offset, cluster_length;
> +	int err;
> +	bool balloc = false;
> +
> +	num_clusters = max(EXFAT_B_TO_CLU_ROUND_UP(offset + length, sbi) -
> +		EXFAT_B_TO_CLU_ROUND_UP(offset, sbi), 1);
> +	mutex_lock(&sbi->s_lock);
> +	err = exfat_map_cluster(inode, EXFAT_B_TO_CLU(offset, sbi),
> +			&cluster, &num_clusters, true, &balloc);
> +	if (err)
> +		goto out;
> +
> +	iomap->bdev = inode->i_sb->s_bdev;
> +	iomap->offset = offset;
> +
> +	cluster_offset = EXFAT_CLU_OFFSET(offset, sbi);
> +	cluster_length = EXFAT_CLU_TO_B(num_clusters, sbi);
> +	if (length > cluster_length - cluster_offset)
> +		iomap->length = cluster_length - cluster_offset;
> +	else
> +		iomap->length = length;
> +	iomap->addr = exfat_cluster_to_phys(sbi, cluster) + cluster_offset;
> +	iomap->type = IOMAP_MAPPED;
> +	if (balloc)
> +		iomap->flags = IOMAP_F_NEW;
> +out:
> +	mutex_unlock(&sbi->s_lock);
> +	return err;
> +}
> +
> +/*
> + * exfat_write_iomap_begin - Mapping for write operations
> + *
> + * Extends ->valid_size if the write starts beyond current initialized size.
> + * Then performs actual block mapping (possibly allocating clusters).
> + */
> +static int exfat_write_iomap_begin(struct inode *inode, loff_t offset,
> +		loff_t length, unsigned int flags, struct iomap *iomap,
> +		struct iomap *srcmap)
> +{
> +	int ret;
> +
> +	if (EXFAT_I(inode)->valid_size < offset) {
> +		ret = exfat_extend_valid_size(inode, offset,
> +				flags & IOMAP_DIRECT ? true : false);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = __exfat_write_iomap_begin(inode, offset, length, iomap);
> +
> +	if (!(flags & IOMAP_DIRECT) && !ret &&
> +	    i_size_read(inode) < iomap->offset + iomap->length) {
> +		i_size_write(inode, iomap->offset + iomap->length);
> +		mark_inode_dirty(inode);
> +	}
> +
> +	return ret;
> +}

Might be a stupid question but, why should this be handled differently
from mkwrite? Also, dirtying the inode here seems redundant. If this is
for the device going away due to power/link failure, I wonder if the
inode would get the chance to be synced between begin() and end() for
this to be effective. Sure, the new clusters are allocated for the
mapping, but before end() is called, what's the point in updating the
isize if there's no data written to the new clusters, yet?

I don't think it matters that much anyways since exFAT is not
journalled. I'm only pointing this out because mark_inode_dirty() is not
a cheap operation.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/5] exfat: add iomap support
  2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
                     ` (3 preceding siblings ...)
  2026-04-06 13:45   ` David Timber
@ 2026-04-06 14:13   ` David Timber
  4 siblings, 0 replies; 31+ messages in thread
From: David Timber @ 2026-04-06 14:13 UTC (permalink / raw)
  To: Namjae Jeon, sj1557.seo, yuezhang.mo
  Cc: linux-fsdevel, anmuxixixi, chizhiling, hch

On 3/26/26 20:50, Namjae Jeon wrote:
> +int exfat_extend_valid_size(struct inode *inode, loff_t off, bool bsync);

Since exFAT is fully embracing bool from now on, would you consider
refactoring the entire exFAT codebase to using bool where fit to cut
down on stack usage and for better readibility?

int and unsigned char being used as bool in the code copied and pasted
from vfat has been bugging me for a while.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/5] exfat: add iomap buffered I/O support
  2026-04-06 13:09   ` David Timber
@ 2026-04-07  6:28     ` Christoph Hellwig
  0 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2026-04-07  6:28 UTC (permalink / raw)
  To: David Timber
  Cc: Namjae Jeon, sj1557.seo, yuezhang.mo, linux-fsdevel, anmuxixixi,
	hch, chizhiling

On Mon, Apr 06, 2026 at 10:09:42PM +0900, David Timber wrote:
> > -#define EXFAT_CLU_TO_B(b, sbi)		((b) << (sbi)->cluster_size_bits)
> > +#define EXFAT_CLU_TO_B(b, sbi)		((loff_t)(b) << (sbi)->cluster_size_bits)
> 
> I think type casting should be left to the users of the macro. These
> helper macros are quite dangerous in case of (ex)FAT because integer
> overflows are unchecked for shift op. The on-disk format is 32 bit but
> the kernel has switched over to "all-64-bit".
> 
> That particular macro is used in dirent update code and, in particular,
> in exfat_map_cluster():
> 
>     inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
> 
> The type of i_blocks is blkcnt_t, which is unsigned. loff_t is signed.
> exFAT got away with it because Linux write calls expand the size of
> files no more than 0x7ffff000 bytes and the size of dir is limited to
> 256MB. However, someone could come in and decide to use
> exfat_map_cluster() to allocate more than this limit.

But that will blow up in some many other ways.  The only sane thing
to do here is to replace those macros with inline functions that
include the proper casting.


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2026-04-07  6:28 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26 11:50 [PATCH 0/5] exfat: convert to iomap Namjae Jeon
2026-03-26 11:50 ` [PATCH 1/5] exfat: add iomap support Namjae Jeon
2026-03-30  2:45   ` Chi Zhiling
2026-03-31  5:29     ` Namjae Jeon
2026-03-30  6:30   ` Christoph Hellwig
2026-03-31  5:26     ` Namjae Jeon
2026-03-31  5:48       ` Christoph Hellwig
2026-03-31  6:44         ` Namjae Jeon
2026-04-01  3:07       ` Chi Zhiling
2026-04-01  2:24   ` Yuezhang.Mo
2026-04-01  2:47     ` Namjae Jeon
2026-04-06 13:45   ` David Timber
2026-04-06 14:13   ` David Timber
2026-03-26 11:50 ` [PATCH 2/5] exfat: add iomap direct I/O support Namjae Jeon
2026-03-30  6:33   ` Christoph Hellwig
2026-03-31  5:23     ` Namjae Jeon
2026-03-26 11:50 ` [PATCH 3/5] exfat: add iomap buffered " Namjae Jeon
2026-03-30  6:38   ` Christoph Hellwig
2026-03-31  5:22     ` Namjae Jeon
2026-03-31  5:46       ` Christoph Hellwig
2026-03-31  6:36         ` Namjae Jeon
2026-03-31  6:37           ` Christoph Hellwig
2026-03-31  6:58             ` Namjae Jeon
2026-04-06 13:09   ` David Timber
2026-04-07  6:28     ` Christoph Hellwig
2026-03-26 11:50 ` [PATCH 4/5] exfat: add support for multi-cluster allocation Namjae Jeon
2026-03-26 11:50 ` [PATCH 5/5] exfat: add support for SEEK_HOLE and SEEK_DATA in llseek Namjae Jeon
2026-03-30  6:39   ` Christoph Hellwig
2026-03-31  4:55     ` Namjae Jeon
2026-03-27  6:33 ` [PATCH 0/5] exfat: convert to iomap Christoph Hellwig
2026-03-27  6:46   ` Namjae Jeon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox