* [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
@ 2026-04-29 14:52 ` Darrick J. Wong
2026-04-29 14:53 ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
` (17 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:52 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Add enough of an iomap implementation that we can do FIEMAP and
SEEK_DATA and SEEK_HOLE.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
configure | 49 +++++
configure.ac | 31 +++
fuse4fs/fuse4fs.c | 540 +++++++++++++++++++++++++++++++++++++++++++++++++++++
lib/config.h.in | 3
misc/fuse2fs.c | 540 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 1161 insertions(+), 2 deletions(-)
diff --git a/configure b/configure
index 80aad505da550c..344c7af2ee48f8 100755
--- a/configure
+++ b/configure
@@ -14608,6 +14608,7 @@ printf "%s\n" "yes" >&6; }
fi
+have_fuse_iomap=
if test -n "$FUSE_LIB"
then
FUSE_USE_VERSION=319
@@ -14634,12 +14635,60 @@ esac
fi
done
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for iomap_begin in libfuse" >&5
+printf %s "checking for iomap_begin in libfuse... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #define _GNU_SOURCE
+ #define _FILE_OFFSET_BITS 64
+ #define FUSE_USE_VERSION 399
+ #include <fuse.h>
+
+int
+main (void)
+{
+
+ struct fuse_operations fs_ops = {
+ .iomap_begin = NULL,
+ .iomap_end = NULL,
+ };
+ struct fuse_file_iomap narf = { };
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+ have_fuse_iomap=yes
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+else case e in #(
+ e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; } ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+ if test "$have_fuse_iomap" = yes
+ then
+ FUSE_USE_VERSION=399
+ fi
fi
if test -n "$FUSE_USE_VERSION"
then
printf "%s\n" "#define FUSE_USE_VERSION $FUSE_USE_VERSION" >>confdefs.h
+fi
+if test -n "$have_fuse_iomap"
+then
+
+printf "%s\n" "#define HAVE_FUSE_IOMAP 1" >>confdefs.h
+
fi
have_fuse_lowlevel=
diff --git a/configure.ac b/configure.ac
index 63a5cd697a6dde..8d85e9966877ea 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1385,6 +1385,7 @@ dnl
dnl Set FUSE_USE_VERSION, which is how fuse servers build against a particular
dnl libfuse ABI. Currently we link against the libfuse 3.19 ABI (hence 319)
dnl
+have_fuse_iomap=
if test -n "$FUSE_LIB"
then
FUSE_USE_VERSION=319
@@ -1394,12 +1395,42 @@ then
[AC_MSG_FAILURE([Cannot build against fuse3 headers])],
[#define _FILE_OFFSET_BITS 64
#define FUSE_USE_VERSION 319])
+
+ dnl
+ dnl Check if the fuse library supports iomap, which requires a higher
+ dnl FUSE_USE_VERSION ABI version (3.99)
+ dnl
+ AC_MSG_CHECKING(for iomap_begin in libfuse)
+ AC_LINK_IFELSE(
+ [ AC_LANG_PROGRAM([[
+ #define _GNU_SOURCE
+ #define _FILE_OFFSET_BITS 64
+ #define FUSE_USE_VERSION 399
+ #include <fuse.h>
+ ]], [[
+ struct fuse_operations fs_ops = {
+ .iomap_begin = NULL,
+ .iomap_end = NULL,
+ };
+ struct fuse_file_iomap narf = { };
+ ]])
+ ], have_fuse_iomap=yes
+ AC_MSG_RESULT(yes),
+ AC_MSG_RESULT(no))
+ if test "$have_fuse_iomap" = yes
+ then
+ FUSE_USE_VERSION=399
+ fi
fi
if test -n "$FUSE_USE_VERSION"
then
AC_DEFINE_UNQUOTED(FUSE_USE_VERSION, $FUSE_USE_VERSION,
[Define to the version of FUSE to use])
fi
+if test -n "$have_fuse_iomap"
+then
+ AC_DEFINE(HAVE_FUSE_IOMAP, 1, [Define to 1 if fuse supports iomap])
+fi
dnl
dnl Check if the FUSE lowlevel library is supported
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index dc5a0ede9f5072..a159024f778ba2 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -155,6 +155,9 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
return b - m;
}
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
#define dbg_printf(fuse4fs, format, ...) \
while ((fuse4fs)->debug) { \
printf("FUSE4FS (%s): tid=%llu " format, (fuse4fs)->shortdev, get_thread_id(), ##__VA_ARGS__); \
@@ -233,6 +236,14 @@ enum fuse4fs_opstate {
F4OP_SHUTDOWN,
};
+#ifdef HAVE_FUSE_IOMAP
+enum fuse4fs_iomap_state {
+ IOMAP_DISABLED,
+ IOMAP_UNKNOWN,
+ IOMAP_ENABLED,
+};
+#endif
+
/* Main program context */
#define FUSE4FS_MAGIC (0xEF53DEADUL)
struct fuse4fs {
@@ -260,6 +271,9 @@ struct fuse4fs {
int logfd;
int blocklog;
int oom_score_adj;
+#ifdef HAVE_FUSE_IOMAP
+ enum fuse4fs_iomap_state iomap_state;
+#endif
unsigned int blockmask;
unsigned long offset;
unsigned int next_generation;
@@ -882,6 +896,15 @@ fuse4fs_set_handle(struct fuse_file_info *fp, struct fuse4fs_file_handle *fh)
fp->keep_cache = 1;
}
+#ifdef HAVE_FUSE_IOMAP
+static inline int fuse4fs_iomap_enabled(const struct fuse4fs *ff)
+{
+ return ff->iomap_state >= IOMAP_ENABLED;
+}
+#else
+# define fuse4fs_iomap_enabled(...) (0)
+#endif
+
static void get_now(struct timespec *now)
{
#ifdef CLOCK_REALTIME
@@ -1514,7 +1537,7 @@ static errcode_t fuse4fs_open(struct fuse4fs *ff)
char options[128];
double deadline;
int flags = EXT2_FLAG_64BITS | EXT2_FLAG_THREADS | EXT2_FLAG_RW |
- EXT2_FLAG_EXCLUSIVE;
+ EXT2_FLAG_EXCLUSIVE | EXT2_FLAG_WRITE_FULL_SUPER;
errcode_t err;
if (ff->lockfile) {
@@ -1808,6 +1831,15 @@ static void op_destroy(void *userdata)
(stats->cache_hits + stats->cache_misses));
}
+ /*
+ * If we're mounting in iomap mode, we need to unmount in op_destroy so
+ * that the block device will be released before umount(2) returns.
+ */
+ if (ff->iomap_state == IOMAP_ENABLED) {
+ fuse4fs_mmp_cancel(ff);
+ fuse4fs_unmount(ff);
+ }
+
fuse4fs_finish(ff, 0);
}
@@ -1948,6 +1980,44 @@ static inline int fuse_set_feature_flag(struct fuse_conn_info *conn,
}
#endif
+#ifdef HAVE_FUSE_IOMAP
+static inline bool fuse4fs_wants_iomap(struct fuse4fs *ff)
+{
+ if (ff->iomap_state == IOMAP_DISABLED)
+ return false;
+
+ /* iomap only works with block devices */
+ if (!(ff->fs->io->flags & CHANNEL_FLAGS_BLOCK_DEVICE))
+ return false;
+
+ /*
+ * iomap addrs must be aligned to the bdev lba size; we use fs
+ * blocksize as a proxy here
+ */
+ if (ff->offset % ff->fs->blocksize > 0)
+ return false;
+
+ return true;
+}
+
+static void fuse4fs_iomap_enable(struct fuse_conn_info *conn,
+ struct fuse4fs *ff)
+{
+ /* Don't let anyone touch iomap until the end of the patchset. */
+ ff->iomap_state = IOMAP_DISABLED;
+ return;
+
+ if (fuse4fs_wants_iomap(ff) &&
+ fuse_set_feature_flag(conn, FUSE_CAP_IOMAP))
+ ff->iomap_state = IOMAP_ENABLED;
+
+ if (ff->iomap_state == IOMAP_UNKNOWN)
+ ff->iomap_state = IOMAP_DISABLED;
+}
+#else
+# define fuse4fs_iomap_enable(...) ((void)0)
+#endif
+
static void op_init(void *userdata, struct fuse_conn_info *conn)
{
struct fuse4fs *ff = userdata;
@@ -1970,6 +2040,7 @@ static void op_init(void *userdata, struct fuse_conn_info *conn)
#ifdef FUSE_CAP_NO_EXPORT_SUPPORT
fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT);
#endif
+ fuse4fs_iomap_enable(conn, ff);
conn->time_gran = 1;
if (ff->opstate == F4OP_WRITABLE)
@@ -5917,6 +5988,466 @@ static void op_fallocate(fuse_req_t req, fuse_ino_t fino EXT2FS_ATTR((unused)),
}
#endif /* SUPPORT_FALLOCATE */
+#ifdef HAVE_FUSE_IOMAP
+static void fuse4fs_iomap_hole(struct fuse4fs *ff, struct fuse_file_iomap *iomap,
+ off_t pos, uint64_t count)
+{
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->offset = pos;
+ iomap->length = count;
+ iomap->type = FUSE_IOMAP_TYPE_HOLE;
+}
+
+static void fuse4fs_iomap_hole_to_eof(struct fuse4fs *ff,
+ struct fuse_file_iomap *iomap, off_t pos,
+ off_t count,
+ const struct ext2_inode_large *inode)
+{
+ ext2_filsys fs = ff->fs;
+ uint64_t isize = EXT2_I_SIZE(inode);
+
+ /*
+ * We have to be careful about handling a hole to the right of the
+ * entire mapping tree. First, the mapping must start and end on a
+ * block boundary because they must be aligned to at least an LBA for
+ * the block layer; and to the fsblock for smoother operation.
+ *
+ * As for the length -- we could return a mapping all the way to
+ * i_size, but i_size could be less than pos/count if we're zeroing the
+ * EOF block in anticipation of a truncate operation. Similarly, we
+ * don't want to end the mapping at pos+count because we know there's
+ * nothing mapped beyond here.
+ */
+ uint64_t startoff = round_down(pos, fs->blocksize);
+ uint64_t eofoff = round_up(max(pos + count, isize), fs->blocksize);
+
+ dbg_printf(ff,
+ "pos=0x%llx count=0x%llx isize=0x%llx startoff=0x%llx eofoff=0x%llx\n",
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ (unsigned long long)isize,
+ (unsigned long long)startoff,
+ (unsigned long long)eofoff);
+
+ fuse4fs_iomap_hole(ff, iomap, startoff, eofoff - startoff);
+}
+
+#define DEBUG_IOMAP
+#ifdef DEBUG_IOMAP
+# define __DUMP_EXTENT(ff, func, tag, startoff, err, extent) \
+ do { \
+ dbg_printf((ff), \
+ "%s: %s startoff 0x%llx err %ld lblk 0x%llx pblk 0x%llx len 0x%x flags 0x%x\n", \
+ (func), (tag), (startoff), (err), (extent)->e_lblk, \
+ (extent)->e_pblk, (extent)->e_len, \
+ (extent)->e_flags & EXT2_EXTENT_FLAGS_UNINIT); \
+ } while(0)
+# define DUMP_EXTENT(ff, tag, startoff, err, extent) \
+ __DUMP_EXTENT((ff), __func__, (tag), (startoff), (err), (extent))
+
+# define __DUMP_INFO(ff, func, tag, startoff, err, info) \
+ do { \
+ dbg_printf((ff), \
+ "%s: %s startoff 0x%llx err %ld entry %d/%d/%d level %d/%d\n", \
+ (func), (tag), (startoff), (err), \
+ (info)->curr_entry, (info)->num_entries, \
+ (info)->max_entries, (info)->curr_level, \
+ (info)->max_depth); \
+ } while(0)
+# define DUMP_INFO(ff, tag, startoff, err, info) \
+ __DUMP_INFO((ff), __func__, (tag), (startoff), (err), (info))
+#else
+# define __DUMP_EXTENT(...) ((void)0)
+# define DUMP_EXTENT(...) ((void)0)
+# define DUMP_INFO(...) ((void)0)
+#endif
+
+static inline errcode_t __fuse4fs_get_mapping_at(struct fuse4fs *ff,
+ ext2_extent_handle_t handle,
+ blk64_t startoff,
+ struct ext2fs_extent *bmap,
+ const char *func)
+{
+ errcode_t err;
+
+ /*
+ * Find the file mapping at startoff. We don't check the return value
+ * of _goto because _get will error out if _goto failed. There's a
+ * subtlety to the outcome of _goto when startoff falls in a sparse
+ * hole however:
+ *
+ * Most of the time, _goto points the cursor at the mapping whose lblk
+ * is just to the left of startoff. The mapping may or may not overlap
+ * startoff; this is ok. In other words, the tree lookup behaves as if
+ * we asked it to use a less than or equals comparison.
+ *
+ * However, if startoff is to the left of the first mapping in the
+ * extent tree, _goto points the cursor at that first mapping because
+ * it doesn't know how to deal with this situation. In this case,
+ * the tree lookup behaves as if we asked it to use a greater than
+ * or equals comparison.
+ *
+ * Note: If _get() returns 'no current node', that means that there
+ * aren't any mappings at all.
+ */
+ ext2fs_extent_goto(handle, startoff);
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, bmap);
+ __DUMP_EXTENT(ff, func, "lookup", startoff, err, bmap);
+ if (err == EXT2_ET_NO_CURRENT_NODE)
+ err = EXT2_ET_EXTENT_NOT_FOUND;
+ return err;
+}
+
+static inline errcode_t __fuse4fs_get_next_mapping(struct fuse4fs *ff,
+ ext2_extent_handle_t handle,
+ blk64_t startoff,
+ struct ext2fs_extent *bmap,
+ const char *func)
+{
+ struct ext2fs_extent newex;
+ struct ext2_extent_info info;
+ errcode_t err;
+
+ /*
+ * The extent tree code has this (probably broken) behavior that if
+ * more than two of the highest levels of the cursor point at the
+ * rightmost edge of an extent tree block, a _NEXT_LEAF movement fails
+ * to move the cursor position of any of the lower levels. IOWs, if
+ * leaf level N is at the right edge, it will only advance level N-1
+ * to the right. If N-1 was at the right edge, the cursor resets to
+ * record 0 of that level and goes down to the wrong leaf.
+ *
+ * Work around this by walking up (towards root level 0) the extent
+ * tree until we find a level where we're not already at the rightmost
+ * edge. The _NEXT_LEAF movement will walk down the tree to find the
+ * leaves.
+ */
+ err = ext2fs_extent_get_info(handle, &info);
+ DUMP_INFO(ff, "UP?", startoff, err, &info);
+ if (err)
+ return err;
+
+ while (info.curr_entry == info.num_entries && info.curr_level > 0) {
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_UP, &newex);
+ DUMP_EXTENT(ff, "UP", startoff, err, &newex);
+ if (err)
+ return err;
+ err = ext2fs_extent_get_info(handle, &info);
+ DUMP_INFO(ff, "UP", startoff, err, &info);
+ if (err)
+ return err;
+ }
+
+ /*
+ * If we're at the root and there are no more entries, there's nothing
+ * else to be found.
+ */
+ if (info.curr_level == 0 && info.curr_entry == info.num_entries)
+ return EXT2_ET_EXTENT_NOT_FOUND;
+
+ /* Otherwise grab this next leaf and return it. */
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF, &newex);
+ DUMP_EXTENT(ff, "NEXT", startoff, err, &newex);
+ if (err)
+ return err;
+
+ *bmap = newex;
+ return 0;
+}
+
+#define fuse4fs_get_mapping_at(ff, handle, startoff, bmap) \
+ __fuse4fs_get_mapping_at((ff), (handle), (startoff), (bmap), __func__)
+#define fuse4fs_get_next_mapping(ff, handle, startoff, bmap) \
+ __fuse4fs_get_next_mapping((ff), (handle), (startoff), (bmap), __func__)
+
+static errcode_t fuse4fs_iomap_begin_extent(struct fuse4fs *ff, uint64_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *iomap)
+{
+ ext2_extent_handle_t handle;
+ struct ext2fs_extent extent = { };
+ ext2_filsys fs = ff->fs;
+ const blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+ errcode_t err;
+ int ret = 0;
+
+ err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = fuse4fs_get_mapping_at(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ /* No mappings at all; the whole range is a hole. */
+ fuse4fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ goto out_handle;
+ }
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_handle;
+ }
+
+ if (startoff < extent.e_lblk) {
+ /*
+ * Mapping starts to the right of the current position.
+ * Synthesize a hole going to that next extent.
+ */
+ fuse4fs_iomap_hole(ff, iomap, FUSE4FS_FSB_TO_B(ff, startoff),
+ FUSE4FS_FSB_TO_B(ff, extent.e_lblk - startoff));
+ goto out_handle;
+ }
+
+ if (startoff >= extent.e_lblk + extent.e_len) {
+ /*
+ * Mapping ends to the left of the current position. Try to
+ * find the next mapping. If there is no next mapping, the
+ * whole range is in a hole.
+ */
+ err = fuse4fs_get_next_mapping(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ fuse4fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ goto out_handle;
+ }
+
+ /*
+ * If the new mapping starts to the right of startoff, there's
+ * a hole from startoff to the start of the new mapping.
+ */
+ if (startoff < extent.e_lblk) {
+ fuse4fs_iomap_hole(ff, iomap,
+ FUSE4FS_FSB_TO_B(ff, startoff),
+ FUSE4FS_FSB_TO_B(ff, extent.e_lblk - startoff));
+ goto out_handle;
+ }
+
+ /*
+ * The new mapping starts at startoff. Something weird
+ * happened in the extent tree lookup, but we found a valid
+ * mapping so we'll run with it.
+ */
+ }
+
+ /* Mapping overlaps startoff, report this. */
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE4FS_FSB_TO_B(ff, extent.e_pblk) + ff->offset;
+ iomap->offset = FUSE4FS_FSB_TO_B(ff, extent.e_lblk);
+ iomap->length = FUSE4FS_FSB_TO_B(ff, extent.e_len);
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT)
+ iomap->type = FUSE_IOMAP_TYPE_UNWRITTEN;
+ else
+ iomap->type = FUSE_IOMAP_TYPE_MAPPED;
+
+out_handle:
+ ext2fs_extent_free(handle);
+ return ret;
+}
+
+static int fuse4fs_iomap_begin_indirect(struct fuse4fs *ff, uint64_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *iomap)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+ uint64_t isize = EXT2_I_SIZE(inode);
+ uint64_t real_count = min(count, 131072);
+ const blk64_t endoff = FUSE4FS_B_TO_FSB(ff, pos + real_count);
+ blk64_t startblock;
+ errcode_t err;
+
+ err = ext2fs_bmap2(fs, ino, EXT2_INODE(inode), NULL, 0, startoff, NULL,
+ &startblock);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->offset = FUSE4FS_FSB_TO_B(ff, startoff);
+ iomap->flags |= FUSE_IOMAP_F_MERGED;
+ if (startblock) {
+ iomap->addr = FUSE4FS_FSB_TO_B(ff, startblock) + ff->offset;
+ iomap->type = FUSE_IOMAP_TYPE_MAPPED;
+ } else {
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->type = FUSE_IOMAP_TYPE_HOLE;
+ }
+ iomap->length = fs->blocksize;
+
+ /* See how long the mapping goes for. */
+ for (startoff++; startoff < endoff; startoff++) {
+ blk64_t prev_startblock = startblock;
+
+ err = ext2fs_bmap2(fs, ino, EXT2_INODE(inode), NULL, 0,
+ startoff, NULL, &startblock);
+ if (err)
+ break;
+
+ if (iomap->type == FUSE_IOMAP_TYPE_MAPPED) {
+ if (startblock == prev_startblock + 1)
+ iomap->length += fs->blocksize;
+ else
+ break;
+ } else {
+ if (startblock == 0)
+ iomap->length += fs->blocksize;
+ else
+ break;
+ }
+ }
+
+ /*
+ * If this is a hole that goes beyond EOF, report this as a hole to the
+ * end of the range queried so that FIEMAP doesn't go mad.
+ */
+ if (iomap->type == FUSE_IOMAP_TYPE_HOLE &&
+ iomap->offset + iomap->length >= isize)
+ fuse4fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+
+ return 0;
+}
+
+static int fuse4fs_iomap_begin_inline(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, struct fuse_file_iomap *iomap)
+{
+ uint64_t one_fsb = FUSE4FS_FSB_TO_B(ff, 1);
+
+ if (pos >= one_fsb) {
+ fuse4fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ } else {
+ /* ext4 only supports inline data files up to 1 fsb */
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->offset = 0;
+ iomap->length = one_fsb;
+ iomap->type = FUSE_IOMAP_TYPE_INLINE;
+ }
+
+ return 0;
+}
+
+static int fuse4fs_iomap_begin_report(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ if (inode->i_flags & EXT4_INLINE_DATA_FL)
+ return fuse4fs_iomap_begin_inline(ff, ino, inode, pos, count,
+ read);
+
+ if (inode->i_flags & EXT4_EXTENTS_FL)
+ return fuse4fs_iomap_begin_extent(ff, ino, inode, pos, count,
+ opflags, read);
+
+ return fuse4fs_iomap_begin_indirect(ff, ino, inode, pos, count,
+ opflags, read);
+}
+
+static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ return -ENOSYS;
+}
+
+static int fuse4fs_iomap_begin_write(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ return -ENOSYS;
+}
+
+static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
+ off_t pos, uint64_t count, uint32_t opflags)
+{
+ struct fuse4fs *ff = fuse4fs_get(req);
+ struct ext2_inode_large inode;
+ struct fuse_file_iomap read = { };
+ ext2_filsys fs;
+ ext2_ino_t ino;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ FUSE4FS_CONVERT_FINO(req, &ino, fino);
+
+ dbg_printf(ff, "%s: ino=%d pos=0x%llx count=0x%llx opflags=0x%x\n",
+ __func__, ino,
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ opflags);
+
+ fs = fuse4fs_start(ff);
+ err = fuse4fs_read_inode(fs, ino, &inode);
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_unlock;
+ }
+
+ if (opflags & FUSE_IOMAP_OP_REPORT)
+ ret = fuse4fs_iomap_begin_report(ff, ino, &inode, pos, count,
+ opflags, &read);
+ else if (fuse_iomap_is_write(opflags))
+ ret = fuse4fs_iomap_begin_write(ff, ino, &inode, pos, count,
+ opflags, &read);
+ else
+ ret = fuse4fs_iomap_begin_read(ff, ino, &inode, pos, count,
+ opflags, &read);
+ if (ret)
+ goto out_unlock;
+
+ dbg_printf(ff,
+ "%s: ino=%d pos=0x%llx -> addr=0x%llx offset=0x%llx length=0x%llx type=%u flags=0x%x\n",
+ __func__, ino,
+ (unsigned long long)pos,
+ (unsigned long long)read.addr,
+ (unsigned long long)read.offset,
+ (unsigned long long)read.length,
+ read.type,
+ read.flags);
+
+ /* Not filling even the first byte will make the kernel unhappy. */
+ if (read.offset > pos || read.offset + read.length <= pos) {
+ ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+ goto out_unlock;
+ }
+
+out_unlock:
+ fuse4fs_finish(ff, ret);
+ if (ret)
+ fuse_reply_err(req, -ret);
+ else
+ fuse_reply_iomap_begin(req, &read, NULL);
+}
+
+static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
+ off_t pos, uint64_t count, uint32_t opflags,
+ ssize_t written, const struct fuse_file_iomap *iomap)
+{
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_ino_t ino;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ FUSE4FS_CONVERT_FINO(req, &ino, fino);
+
+ dbg_printf(ff,
+ "%s: ino=%d pos=0x%llx count=0x%llx opflags=0x%x written=0x%zx mapflags=0x%x\n",
+ __func__, ino,
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ opflags,
+ written,
+ iomap->flags);
+
+ fuse_reply_err(req, 0);
+}
+#endif /* HAVE_FUSE_IOMAP */
+
static struct fuse_lowlevel_ops fs_ops = {
.lookup = op_lookup,
.setattr = op_setattr,
@@ -5960,6 +6491,10 @@ static struct fuse_lowlevel_ops fs_ops = {
#ifdef SUPPORT_FALLOCATE
.fallocate = op_fallocate,
#endif
+#ifdef HAVE_FUSE_IOMAP
+ .iomap_begin = op_iomap_begin,
+ .iomap_end = op_iomap_end,
+#endif /* HAVE_FUSE_IOMAP */
};
static int get_random_bytes(void *p, size_t sz)
@@ -6413,6 +6948,9 @@ int main(int argc, char *argv[])
.opstate = F4OP_WRITABLE,
#ifdef HAVE_FUSE4FS_SERVICE
.bdev_fd = -1,
+#endif
+#ifdef HAVE_FUSE_IOMAP
+ .iomap_state = IOMAP_UNKNOWN,
#endif
};
errcode_t err;
diff --git a/lib/config.h.in b/lib/config.h.in
index 2c25632188e4f3..58338cc926590e 100644
--- a/lib/config.h.in
+++ b/lib/config.h.in
@@ -148,6 +148,9 @@
/* Define to 1 if you have the <fuse.h> header file. */
#undef HAVE_FUSE_H
+/* Define to 1 if fuse supports iomap */
+#undef HAVE_FUSE_IOMAP
+
/* Define to 1 if fuse supports lowlevel API */
#undef HAVE_FUSE_LOWLEVEL
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 0f4781bc49f18f..63c9b59e54fb04 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -138,6 +138,9 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
return b - m;
}
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
#define dbg_printf(fuse2fs, format, ...) \
while ((fuse2fs)->debug) { \
printf("FUSE2FS (%s): tid=%llu " format, (fuse2fs)->shortdev, get_thread_id(), ##__VA_ARGS__); \
@@ -215,6 +218,14 @@ enum fuse2fs_opstate {
F2OP_SHUTDOWN,
};
+#ifdef HAVE_FUSE_IOMAP
+enum fuse2fs_iomap_state {
+ IOMAP_DISABLED,
+ IOMAP_UNKNOWN,
+ IOMAP_ENABLED,
+};
+#endif
+
/* Main program context */
#define FUSE2FS_MAGIC (0xEF53DEADUL)
struct fuse2fs {
@@ -242,6 +253,9 @@ struct fuse2fs {
int logfd;
int blocklog;
int oom_score_adj;
+#ifdef HAVE_FUSE_IOMAP
+ enum fuse2fs_iomap_state iomap_state;
+#endif
unsigned int blockmask;
unsigned long offset;
unsigned int next_generation;
@@ -693,6 +707,15 @@ fuse2fs_set_handle(struct fuse_file_info *fp, struct fuse2fs_file_handle *fh)
fp->fh = (uintptr_t)fh;
}
+#ifdef HAVE_FUSE_IOMAP
+static inline int fuse2fs_iomap_enabled(const struct fuse2fs *ff)
+{
+ return ff->iomap_state >= IOMAP_ENABLED;
+}
+#else
+# define fuse2fs_iomap_enabled(...) (0)
+#endif
+
static void get_now(struct timespec *now)
{
#ifdef CLOCK_REALTIME
@@ -1122,7 +1145,7 @@ static errcode_t fuse2fs_open(struct fuse2fs *ff)
char options[128];
double deadline;
int flags = EXT2_FLAG_64BITS | EXT2_FLAG_THREADS | EXT2_FLAG_RW |
- EXT2_FLAG_EXCLUSIVE;
+ EXT2_FLAG_EXCLUSIVE | EXT2_FLAG_WRITE_FULL_SUPER;
errcode_t err;
if (ff->lockfile) {
@@ -1409,6 +1432,15 @@ static void op_destroy(void *p EXT2FS_ATTR((unused)))
(stats->cache_hits + stats->cache_misses));
}
+ /*
+ * If we're mounting in iomap mode, we need to unmount in op_destroy so
+ * that the block device will be released before umount(2) returns.
+ */
+ if (ff->iomap_state == IOMAP_ENABLED) {
+ fuse2fs_mmp_cancel(ff);
+ fuse2fs_unmount(ff);
+ }
+
fuse2fs_finish(ff, 0);
}
@@ -1545,6 +1577,44 @@ static inline int fuse_set_feature_flag(struct fuse_conn_info *conn,
}
#endif
+#ifdef HAVE_FUSE_IOMAP
+static inline bool fuse2fs_wants_iomap(struct fuse2fs *ff)
+{
+ if (ff->iomap_state == IOMAP_DISABLED)
+ return false;
+
+ /* iomap only works with block devices */
+ if (!(ff->fs->io->flags & CHANNEL_FLAGS_BLOCK_DEVICE))
+ return false;
+
+ /*
+ * iomap addrs must be aligned to the bdev lba size; we use fs
+ * blocksize as a proxy here
+ */
+ if (ff->offset % ff->fs->blocksize > 0)
+ return false;
+
+ return true;
+}
+
+static void fuse2fs_iomap_enable(struct fuse_conn_info *conn,
+ struct fuse2fs *ff)
+{
+ /* Don't let anyone touch iomap until the end of the patchset. */
+ ff->iomap_state = IOMAP_DISABLED;
+ return;
+
+ if (fuse2fs_wants_iomap(ff) &&
+ fuse_set_feature_flag(conn, FUSE_CAP_IOMAP))
+ ff->iomap_state = IOMAP_ENABLED;
+
+ if (ff->iomap_state == IOMAP_UNKNOWN)
+ ff->iomap_state = IOMAP_DISABLED;
+}
+#else
+# define fuse2fs_iomap_enable(...) ((void)0)
+#endif
+
static void *op_init(struct fuse_conn_info *conn,
struct fuse_config *cfg EXT2FS_ATTR((unused)))
{
@@ -1578,6 +1648,8 @@ static void *op_init(struct fuse_conn_info *conn,
#ifdef FUSE_CAP_NO_EXPORT_SUPPORT
fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT);
#endif
+ fuse2fs_iomap_enable(conn, ff);
+
conn->time_gran = 1;
cfg->use_ino = 1;
if (ff->debug)
@@ -5150,6 +5222,465 @@ static int op_fallocate(const char *path EXT2FS_ATTR((unused)), int mode,
}
#endif /* SUPPORT_FALLOCATE */
+#ifdef HAVE_FUSE_IOMAP
+static void fuse2fs_iomap_hole(struct fuse2fs *ff, struct fuse_file_iomap *iomap,
+ off_t pos, uint64_t count)
+{
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->offset = pos;
+ iomap->length = count;
+ iomap->type = FUSE_IOMAP_TYPE_HOLE;
+}
+
+static void fuse2fs_iomap_hole_to_eof(struct fuse2fs *ff,
+ struct fuse_file_iomap *iomap, off_t pos,
+ off_t count,
+ const struct ext2_inode_large *inode)
+{
+ ext2_filsys fs = ff->fs;
+ uint64_t isize = EXT2_I_SIZE(inode);
+
+ /*
+ * We have to be careful about handling a hole to the right of the
+ * entire mapping tree. First, the mapping must start and end on a
+ * block boundary because they must be aligned to at least an LBA for
+ * the block layer; and to the fsblock for smoother operation.
+ *
+ * As for the length -- we could return a mapping all the way to
+ * i_size, but i_size could be less than pos/count if we're zeroing the
+ * EOF block in anticipation of a truncate operation. Similarly, we
+ * don't want to end the mapping at pos+count because we know there's
+ * nothing mapped beyond here.
+ */
+ uint64_t startoff = round_down(pos, fs->blocksize);
+ uint64_t eofoff = round_up(max(pos + count, isize), fs->blocksize);
+
+ dbg_printf(ff,
+ "pos=0x%llx count=0x%llx isize=0x%llx startoff=0x%llx eofoff=0x%llx\n",
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ (unsigned long long)isize,
+ (unsigned long long)startoff,
+ (unsigned long long)eofoff);
+
+ fuse2fs_iomap_hole(ff, iomap, startoff, eofoff - startoff);
+}
+
+#define DEBUG_IOMAP
+#ifdef DEBUG_IOMAP
+# define __DUMP_EXTENT(ff, func, tag, startoff, err, extent) \
+ do { \
+ dbg_printf((ff), \
+ "%s: %s startoff 0x%llx err %ld lblk 0x%llx pblk 0x%llx len 0x%x flags 0x%x\n", \
+ (func), (tag), (startoff), (err), (extent)->e_lblk, \
+ (extent)->e_pblk, (extent)->e_len, \
+ (extent)->e_flags & EXT2_EXTENT_FLAGS_UNINIT); \
+ } while(0)
+# define DUMP_EXTENT(ff, tag, startoff, err, extent) \
+ __DUMP_EXTENT((ff), __func__, (tag), (startoff), (err), (extent))
+
+# define __DUMP_INFO(ff, func, tag, startoff, err, info) \
+ do { \
+ dbg_printf((ff), \
+ "%s: %s startoff 0x%llx err %ld entry %d/%d/%d level %d/%d\n", \
+ (func), (tag), (startoff), (err), \
+ (info)->curr_entry, (info)->num_entries, \
+ (info)->max_entries, (info)->curr_level, \
+ (info)->max_depth); \
+ } while(0)
+# define DUMP_INFO(ff, tag, startoff, err, info) \
+ __DUMP_INFO((ff), __func__, (tag), (startoff), (err), (info))
+#else
+# define __DUMP_EXTENT(...) ((void)0)
+# define DUMP_EXTENT(...) ((void)0)
+# define DUMP_INFO(...) ((void)0)
+#endif
+
+static inline errcode_t __fuse2fs_get_mapping_at(struct fuse2fs *ff,
+ ext2_extent_handle_t handle,
+ blk64_t startoff,
+ struct ext2fs_extent *bmap,
+ const char *func)
+{
+ errcode_t err;
+
+ /*
+ * Find the file mapping at startoff. We don't check the return value
+ * of _goto because _get will error out if _goto failed. There's a
+ * subtlety to the outcome of _goto when startoff falls in a sparse
+ * hole however:
+ *
+ * Most of the time, _goto points the cursor at the mapping whose lblk
+ * is just to the left of startoff. The mapping may or may not overlap
+ * startoff; this is ok. In other words, the tree lookup behaves as if
+ * we asked it to use a less than or equals comparison.
+ *
+ * However, if startoff is to the left of the first mapping in the
+ * extent tree, _goto points the cursor at that first mapping because
+ * it doesn't know how to deal with this situation. In this case,
+ * the tree lookup behaves as if we asked it to use a greater than
+ * or equals comparison.
+ *
+ * Note: If _get() returns 'no current node', that means that there
+ * aren't any mappings at all.
+ */
+ ext2fs_extent_goto(handle, startoff);
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, bmap);
+ __DUMP_EXTENT(ff, func, "lookup", startoff, err, bmap);
+ if (err == EXT2_ET_NO_CURRENT_NODE)
+ err = EXT2_ET_EXTENT_NOT_FOUND;
+ return err;
+}
+
+static inline errcode_t __fuse2fs_get_next_mapping(struct fuse2fs *ff,
+ ext2_extent_handle_t handle,
+ blk64_t startoff,
+ struct ext2fs_extent *bmap,
+ const char *func)
+{
+ struct ext2fs_extent newex;
+ struct ext2_extent_info info;
+ errcode_t err;
+
+ /*
+ * The extent tree code has this (probably broken) behavior that if
+ * more than two of the highest levels of the cursor point at the
+ * rightmost edge of an extent tree block, a _NEXT_LEAF movement fails
+ * to move the cursor position of any of the lower levels. IOWs, if
+ * leaf level N is at the right edge, it will only advance level N-1
+ * to the right. If N-1 was at the right edge, the cursor resets to
+ * record 0 of that level and goes down to the wrong leaf.
+ *
+ * Work around this by walking up (towards root level 0) the extent
+ * tree until we find a level where we're not already at the rightmost
+ * edge. The _NEXT_LEAF movement will walk down the tree to find the
+ * leaves.
+ */
+ err = ext2fs_extent_get_info(handle, &info);
+ DUMP_INFO(ff, "UP?", startoff, err, &info);
+ if (err)
+ return err;
+
+ while (info.curr_entry == info.num_entries && info.curr_level > 0) {
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_UP, &newex);
+ DUMP_EXTENT(ff, "UP", startoff, err, &newex);
+ if (err)
+ return err;
+ err = ext2fs_extent_get_info(handle, &info);
+ DUMP_INFO(ff, "UP", startoff, err, &info);
+ if (err)
+ return err;
+ }
+
+ /*
+ * If we're at the root and there are no more entries, there's nothing
+ * else to be found.
+ */
+ if (info.curr_level == 0 && info.curr_entry == info.num_entries)
+ return EXT2_ET_EXTENT_NOT_FOUND;
+
+ /* Otherwise grab this next leaf and return it. */
+ err = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF, &newex);
+ DUMP_EXTENT(ff, "NEXT", startoff, err, &newex);
+ if (err)
+ return err;
+
+ *bmap = newex;
+ return 0;
+}
+
+#define fuse2fs_get_mapping_at(ff, handle, startoff, bmap) \
+ __fuse2fs_get_mapping_at((ff), (handle), (startoff), (bmap), __func__)
+#define fuse2fs_get_next_mapping(ff, handle, startoff, bmap) \
+ __fuse2fs_get_next_mapping((ff), (handle), (startoff), (bmap), __func__)
+
+static errcode_t fuse2fs_iomap_begin_extent(struct fuse2fs *ff, uint64_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *iomap)
+{
+ ext2_extent_handle_t handle;
+ struct ext2fs_extent extent = { };
+ ext2_filsys fs = ff->fs;
+ const blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+ errcode_t err;
+ int ret = 0;
+
+ err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = fuse2fs_get_mapping_at(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ /* No mappings at all; the whole range is a hole. */
+ fuse2fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ goto out_handle;
+ }
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_handle;
+ }
+
+ if (startoff < extent.e_lblk) {
+ /*
+ * Mapping starts to the right of the current position.
+ * Synthesize a hole going to that next extent.
+ */
+ fuse2fs_iomap_hole(ff, iomap, FUSE2FS_FSB_TO_B(ff, startoff),
+ FUSE2FS_FSB_TO_B(ff, extent.e_lblk - startoff));
+ goto out_handle;
+ }
+
+ if (startoff >= extent.e_lblk + extent.e_len) {
+ /*
+ * Mapping ends to the left of the current position. Try to
+ * find the next mapping. If there is no next mapping, the
+ * whole range is in a hole.
+ */
+ err = fuse2fs_get_next_mapping(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ fuse2fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ goto out_handle;
+ }
+
+ /*
+ * If the new mapping starts to the right of startoff, there's
+ * a hole from startoff to the start of the new mapping.
+ */
+ if (startoff < extent.e_lblk) {
+ fuse2fs_iomap_hole(ff, iomap,
+ FUSE2FS_FSB_TO_B(ff, startoff),
+ FUSE2FS_FSB_TO_B(ff, extent.e_lblk - startoff));
+ goto out_handle;
+ }
+
+ /*
+ * The new mapping starts at startoff. Something weird
+ * happened in the extent tree lookup, but we found a valid
+ * mapping so we'll run with it.
+ */
+ }
+
+ /* Mapping overlaps startoff, report this. */
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE2FS_FSB_TO_B(ff, extent.e_pblk) + ff->offset;
+ iomap->offset = FUSE2FS_FSB_TO_B(ff, extent.e_lblk);
+ iomap->length = FUSE2FS_FSB_TO_B(ff, extent.e_len);
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT)
+ iomap->type = FUSE_IOMAP_TYPE_UNWRITTEN;
+ else
+ iomap->type = FUSE_IOMAP_TYPE_MAPPED;
+
+out_handle:
+ ext2fs_extent_free(handle);
+ return ret;
+}
+
+static int fuse2fs_iomap_begin_indirect(struct fuse2fs *ff, uint64_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *iomap)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+ uint64_t isize = EXT2_I_SIZE(inode);
+ uint64_t real_count = min(count, 131072);
+ const blk64_t endoff = FUSE2FS_B_TO_FSB(ff, pos + real_count);
+ blk64_t startblock;
+ errcode_t err;
+
+ err = ext2fs_bmap2(fs, ino, EXT2_INODE(inode), NULL, 0, startoff, NULL,
+ &startblock);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->offset = FUSE2FS_FSB_TO_B(ff, startoff);
+ iomap->flags |= FUSE_IOMAP_F_MERGED;
+ if (startblock) {
+ iomap->addr = FUSE2FS_FSB_TO_B(ff, startblock) + ff->offset;
+ iomap->type = FUSE_IOMAP_TYPE_MAPPED;
+ } else {
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->type = FUSE_IOMAP_TYPE_HOLE;
+ }
+ iomap->length = fs->blocksize;
+
+ /* See how long the mapping goes for. */
+ for (startoff++; startoff < endoff; startoff++) {
+ blk64_t prev_startblock = startblock;
+
+ err = ext2fs_bmap2(fs, ino, EXT2_INODE(inode), NULL, 0,
+ startoff, NULL, &startblock);
+ if (err)
+ break;
+
+ if (iomap->type == FUSE_IOMAP_TYPE_MAPPED) {
+ if (startblock == prev_startblock + 1)
+ iomap->length += fs->blocksize;
+ else
+ break;
+ } else {
+ if (startblock == 0)
+ iomap->length += fs->blocksize;
+ else
+ break;
+ }
+ }
+
+ /*
+ * If this is a hole that goes beyond EOF, report this as a hole to the
+ * end of the range queried so that FIEMAP doesn't go mad.
+ */
+ if (iomap->type == FUSE_IOMAP_TYPE_HOLE &&
+ iomap->offset + iomap->length >= isize)
+ fuse2fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+
+ return 0;
+}
+
+static int fuse2fs_iomap_begin_inline(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, struct fuse_file_iomap *iomap)
+{
+ uint64_t one_fsb = FUSE2FS_FSB_TO_B(ff, 1);
+
+ if (pos >= one_fsb) {
+ fuse2fs_iomap_hole_to_eof(ff, iomap, pos, count, inode);
+ } else {
+ /* ext4 only supports inline data files up to 1 fsb */
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->addr = FUSE_IOMAP_NULL_ADDR;
+ iomap->offset = 0;
+ iomap->length = one_fsb;
+ iomap->type = FUSE_IOMAP_TYPE_INLINE;
+ }
+
+ return 0;
+}
+
+static int fuse2fs_iomap_begin_report(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ if (inode->i_flags & EXT4_INLINE_DATA_FL)
+ return fuse2fs_iomap_begin_inline(ff, ino, inode, pos, count,
+ read);
+
+ if (inode->i_flags & EXT4_EXTENTS_FL)
+ return fuse2fs_iomap_begin_extent(ff, ino, inode, pos, count,
+ opflags, read);
+
+ return fuse2fs_iomap_begin_indirect(ff, ino, inode, pos, count,
+ opflags, read);
+}
+
+static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ return -ENOSYS;
+}
+
+static int fuse2fs_iomap_begin_write(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read)
+{
+ return -ENOSYS;
+}
+
+static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
+ off_t pos, uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read,
+ struct fuse_file_iomap *write)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ struct ext2_inode_large inode;
+ ext2_filsys fs;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+
+ dbg_printf(ff,
+ "%s: path=%s nodeid=%llu attr_ino=%llu pos=0x%llx count=0x%llx opflags=0x%x\n",
+ __func__, path,
+ (unsigned long long)nodeid,
+ (unsigned long long)attr_ino,
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ opflags);
+
+ fs = fuse2fs_start(ff);
+ err = fuse2fs_read_inode(fs, attr_ino, &inode);
+ if (err) {
+ ret = translate_error(fs, attr_ino, err);
+ goto out_unlock;
+ }
+
+ if (opflags & FUSE_IOMAP_OP_REPORT)
+ ret = fuse2fs_iomap_begin_report(ff, attr_ino, &inode, pos,
+ count, opflags, read);
+ else if (fuse_iomap_is_write(opflags))
+ ret = fuse2fs_iomap_begin_write(ff, attr_ino, &inode, pos,
+ count, opflags, read);
+ else
+ ret = fuse2fs_iomap_begin_read(ff, attr_ino, &inode, pos,
+ count, opflags, read);
+ if (ret)
+ goto out_unlock;
+
+ dbg_printf(ff, "%s: nodeid=%llu attr_ino=%llu pos=0x%llx -> addr=0x%llx offset=0x%llx length=0x%llx type=%u\n",
+ __func__,
+ (unsigned long long)nodeid,
+ (unsigned long long)attr_ino,
+ (unsigned long long)pos,
+ (unsigned long long)read->addr,
+ (unsigned long long)read->offset,
+ (unsigned long long)read->length,
+ read->type);
+
+ /* Not filling even the first byte will make the kernel unhappy. */
+ if (read->offset > pos || read->offset + read->length <= pos) {
+ ret = translate_error(fs, attr_ino, EXT2_ET_INODE_CORRUPTED);
+ goto out_unlock;
+ }
+
+out_unlock:
+ fuse2fs_finish(ff, ret);
+ return ret;
+}
+
+static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
+ off_t pos, uint64_t count, uint32_t opflags,
+ ssize_t written, const struct fuse_file_iomap *iomap)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+
+ dbg_printf(ff,
+ "%s: path=%s nodeid=%llu attr_ino=%llu pos=0x%llx count=0x%llx opflags=0x%x written=0x%zx mapflags=0x%x\n",
+ __func__, path,
+ (unsigned long long)nodeid,
+ (unsigned long long)attr_ino,
+ (unsigned long long)pos,
+ (unsigned long long)count,
+ opflags,
+ written,
+ iomap->flags);
+
+ return 0;
+}
+#endif /* HAVE_FUSE_IOMAP */
+
static struct fuse_operations fs_ops = {
.init = op_init,
.destroy = op_destroy,
@@ -5191,6 +5722,10 @@ static struct fuse_operations fs_ops = {
#ifdef SUPPORT_FALLOCATE
.fallocate = op_fallocate,
#endif
+#ifdef HAVE_FUSE_IOMAP
+ .iomap_begin = op_iomap_begin,
+ .iomap_end = op_iomap_end,
+#endif /* HAVE_FUSE_IOMAP */
};
static int get_random_bytes(void *p, size_t sz)
@@ -5477,6 +6012,9 @@ int main(int argc, char *argv[])
.bfl = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER,
.oom_score_adj = -500,
.opstate = F2OP_WRITABLE,
+#ifdef HAVE_FUSE_IOMAP
+ .iomap_state = IOMAP_UNKNOWN,
+#endif
};
errcode_t err;
FILE *orig_stderr = stderr;
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 02/19] fuse2fs: add iomap= mount option
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-04-29 14:52 ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
@ 2026-04-29 14:53 ` Darrick J. Wong
2026-04-29 14:53 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
` (16 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:53 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Add a mount option to control iomap usage so that we can test before and
after scenarios.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.1.in | 6 ++++++
fuse4fs/fuse4fs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.1.in | 6 ++++++
misc/fuse2fs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 104 insertions(+)
diff --git a/fuse4fs/fuse4fs.1.in b/fuse4fs/fuse4fs.1.in
index 8bef5f48802385..8855867d27101d 100644
--- a/fuse4fs/fuse4fs.1.in
+++ b/fuse4fs/fuse4fs.1.in
@@ -75,6 +75,12 @@ .SS "fuse4fs options:"
\fB-o\fR fuse4fs_debug
enable fuse4fs debugging
.TP
+\fB-o\fR iomap=
+If set to \fI1\fR, requires iomap to be enabled.
+If set to \fI0\fR, forbids use of iomap.
+If set to \fIdefault\fR (or not set), enables iomap if present.
+This substantially improves the performance of the fuse4fs server.
+.TP
\fB-o\fR kernel
Behave more like the kernel ext4 driver in the following ways:
Allows processes owned by other users to access the filesystem.
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index a159024f778ba2..df2bda7cc22bf2 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -236,6 +236,12 @@ enum fuse4fs_opstate {
F4OP_SHUTDOWN,
};
+enum fuse4fs_feature_toggle {
+ FT_DISABLE,
+ FT_ENABLE,
+ FT_DEFAULT,
+};
+
#ifdef HAVE_FUSE_IOMAP
enum fuse4fs_iomap_state {
IOMAP_DISABLED,
@@ -272,6 +278,7 @@ struct fuse4fs {
int blocklog;
int oom_score_adj;
#ifdef HAVE_FUSE_IOMAP
+ enum fuse4fs_feature_toggle iomap_want;
enum fuse4fs_iomap_state iomap_state;
#endif
unsigned int blockmask;
@@ -2013,6 +2020,12 @@ static void fuse4fs_iomap_enable(struct fuse_conn_info *conn,
if (ff->iomap_state == IOMAP_UNKNOWN)
ff->iomap_state = IOMAP_DISABLED;
+
+ if (!fuse4fs_iomap_enabled(ff)) {
+ if (ff->iomap_want == FT_ENABLE)
+ err_printf(ff, "%s\n", _("Could not enable iomap."));
+ return;
+ }
}
#else
# define fuse4fs_iomap_enable(...) ((void)0)
@@ -6522,6 +6535,9 @@ enum {
FUSE4FS_CACHE_SIZE,
FUSE4FS_DIRSYNC,
FUSE4FS_ERRORS_BEHAVIOR,
+#ifdef HAVE_FUSE_IOMAP
+ FUSE4FS_IOMAP,
+#endif
};
#define FUSE4FS_OPT(t, p, v) { t, offsetof(struct fuse4fs, p), v }
@@ -6553,6 +6569,10 @@ static struct fuse_opt fuse4fs_opts[] = {
FUSE_OPT_KEY("cache_size=%s", FUSE4FS_CACHE_SIZE),
FUSE_OPT_KEY("dirsync", FUSE4FS_DIRSYNC),
FUSE_OPT_KEY("errors=%s", FUSE4FS_ERRORS_BEHAVIOR),
+#ifdef HAVE_FUSE_IOMAP
+ FUSE_OPT_KEY("iomap=%s", FUSE4FS_IOMAP),
+ FUSE_OPT_KEY("iomap", FUSE4FS_IOMAP),
+#endif
FUSE_OPT_KEY("-V", FUSE4FS_VERSION),
FUSE_OPT_KEY("--version", FUSE4FS_VERSION),
@@ -6604,6 +6624,23 @@ static int fuse4fs_opt_proc(void *data, const char *arg,
/* do not pass through to libfuse */
return 0;
+#ifdef HAVE_FUSE_IOMAP
+ case FUSE4FS_IOMAP:
+ if (strcmp(arg, "iomap") == 0 || strcmp(arg + 6, "1") == 0)
+ ff->iomap_want = FT_ENABLE;
+ else if (strcmp(arg + 6, "0") == 0)
+ ff->iomap_want = FT_DISABLE;
+ else if (strcmp(arg + 6, "default") == 0)
+ ff->iomap_want = FT_DEFAULT;
+ else {
+ fprintf(stderr, "%s: %s\n", arg,
+ _("unknown iomap= behavior."));
+ return -1;
+ }
+
+ /* do not pass through to libfuse */
+ return 0;
+#endif
case FUSE4FS_IGNORED:
return 0;
case FUSE4FS_HELP:
@@ -6631,6 +6668,9 @@ static int fuse4fs_opt_proc(void *data, const char *arg,
" -o cache_size=N[KMG] use a disk cache of this size\n"
" -o errors= behavior when an error is encountered:\n"
" continue|remount-ro|panic\n"
+#ifdef HAVE_FUSE_IOMAP
+ " -o iomap= 0 to disable iomap, 1 to enable iomap\n"
+#endif
"\n",
outargs->argv[0]);
if (key == FUSE4FS_HELPFULL) {
@@ -6950,6 +6990,7 @@ int main(int argc, char *argv[])
.bdev_fd = -1,
#endif
#ifdef HAVE_FUSE_IOMAP
+ .iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
#endif
};
@@ -6983,6 +7024,11 @@ int main(int argc, char *argv[])
if (fuse4fs_is_service(&fctx))
fuse4fs_service_set_proc_cmdline(&fctx, argc, argv, &args);
+#ifdef HAVE_FUSE_IOMAP
+ if (fctx.iomap_want == FT_DISABLE)
+ fctx.iomap_state = IOMAP_DISABLED;
+#endif
+
/* /dev/sda -> sda for reporting */
fctx.shortdev = strrchr(fctx.device, '/');
if (fctx.shortdev)
diff --git a/misc/fuse2fs.1.in b/misc/fuse2fs.1.in
index 6acfa092851292..2b55fa0e723966 100644
--- a/misc/fuse2fs.1.in
+++ b/misc/fuse2fs.1.in
@@ -75,6 +75,12 @@ .SS "fuse2fs options:"
\fB-o\fR fuse2fs_debug
enable fuse2fs debugging
.TP
+\fB-o\fR iomap=
+If set to \fI1\fR, requires iomap to be enabled.
+If set to \fI0\fR, forbids use of iomap.
+If set to \fIdefault\fR (or not set), enables iomap if present.
+This substantially improves the performance of the fuse2fs server.
+.TP
\fB-o\fR kernel
Behave more like the kernel ext4 driver in the following ways:
Allows processes owned by other users to access the filesystem.
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 63c9b59e54fb04..15ebe6b39f1288 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -218,6 +218,12 @@ enum fuse2fs_opstate {
F2OP_SHUTDOWN,
};
+enum fuse2fs_feature_toggle {
+ FT_DISABLE,
+ FT_ENABLE,
+ FT_DEFAULT,
+};
+
#ifdef HAVE_FUSE_IOMAP
enum fuse2fs_iomap_state {
IOMAP_DISABLED,
@@ -254,6 +260,7 @@ struct fuse2fs {
int blocklog;
int oom_score_adj;
#ifdef HAVE_FUSE_IOMAP
+ enum fuse2fs_feature_toggle iomap_want;
enum fuse2fs_iomap_state iomap_state;
#endif
unsigned int blockmask;
@@ -1610,6 +1617,12 @@ static void fuse2fs_iomap_enable(struct fuse_conn_info *conn,
if (ff->iomap_state == IOMAP_UNKNOWN)
ff->iomap_state = IOMAP_DISABLED;
+
+ if (!fuse2fs_iomap_enabled(ff)) {
+ if (ff->iomap_want == FT_ENABLE)
+ err_printf(ff, "%s\n", _("Could not enable iomap."));
+ return;
+ }
}
#else
# define fuse2fs_iomap_enable(...) ((void)0)
@@ -5753,6 +5766,9 @@ enum {
FUSE2FS_CACHE_SIZE,
FUSE2FS_DIRSYNC,
FUSE2FS_ERRORS_BEHAVIOR,
+#ifdef HAVE_FUSE_IOMAP
+ FUSE2FS_IOMAP,
+#endif
};
#define FUSE2FS_OPT(t, p, v) { t, offsetof(struct fuse2fs, p), v }
@@ -5784,6 +5800,10 @@ static struct fuse_opt fuse2fs_opts[] = {
FUSE_OPT_KEY("cache_size=%s", FUSE2FS_CACHE_SIZE),
FUSE_OPT_KEY("dirsync", FUSE2FS_DIRSYNC),
FUSE_OPT_KEY("errors=%s", FUSE2FS_ERRORS_BEHAVIOR),
+#ifdef HAVE_FUSE_IOMAP
+ FUSE_OPT_KEY("iomap=%s", FUSE2FS_IOMAP),
+ FUSE_OPT_KEY("iomap", FUSE2FS_IOMAP),
+#endif
FUSE_OPT_KEY("-V", FUSE2FS_VERSION),
FUSE_OPT_KEY("--version", FUSE2FS_VERSION),
@@ -5835,6 +5855,23 @@ static int fuse2fs_opt_proc(void *data, const char *arg,
/* do not pass through to libfuse */
return 0;
+#ifdef HAVE_FUSE_IOMAP
+ case FUSE2FS_IOMAP:
+ if (strcmp(arg, "iomap") == 0 || strcmp(arg + 6, "1") == 0)
+ ff->iomap_want = FT_ENABLE;
+ else if (strcmp(arg + 6, "0") == 0)
+ ff->iomap_want = FT_DISABLE;
+ else if (strcmp(arg + 6, "default") == 0)
+ ff->iomap_want = FT_DEFAULT;
+ else {
+ fprintf(stderr, "%s: %s\n", arg,
+ _("unknown iomap= behavior."));
+ return -1;
+ }
+
+ /* do not pass through to libfuse */
+ return 0;
+#endif
case FUSE2FS_IGNORED:
return 0;
case FUSE2FS_HELP:
@@ -5862,6 +5899,9 @@ static int fuse2fs_opt_proc(void *data, const char *arg,
" -o cache_size=N[KMG] use a disk cache of this size\n"
" -o errors= behavior when an error is encountered:\n"
" continue|remount-ro|panic\n"
+#ifdef HAVE_FUSE_IOMAP
+ " -o iomap= 0 to disable iomap, 1 to enable iomap\n"
+#endif
"\n",
outargs->argv[0]);
if (key == FUSE2FS_HELPFULL) {
@@ -6013,6 +6053,7 @@ int main(int argc, char *argv[])
.oom_score_adj = -500,
.opstate = F2OP_WRITABLE,
#ifdef HAVE_FUSE_IOMAP
+ .iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
#endif
};
@@ -6029,6 +6070,11 @@ int main(int argc, char *argv[])
exit(1);
}
+#ifdef HAVE_FUSE_IOMAP
+ if (fctx.iomap_want == FT_DISABLE)
+ fctx.iomap_state = IOMAP_DISABLED;
+#endif
+
/* /dev/sda -> sda for reporting */
fctx.shortdev = strrchr(fctx.device, '/');
if (fctx.shortdev)
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 03/19] fuse2fs: implement iomap configuration
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-04-29 14:52 ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2026-04-29 14:53 ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
@ 2026-04-29 14:53 ` Darrick J. Wong
2026-04-29 14:53 ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
` (15 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:53 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Upload the filesystem geometry to the kernel when asked.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++--
misc/fuse2fs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 188 insertions(+), 6 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index df2bda7cc22bf2..feb46bdfbac39b 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -208,6 +208,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
# define FL_ZERO_RANGE_FLAG (0)
#endif
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC (1000000000L)
+#endif
+
errcode_t ext2fs_check_ext3_journal(ext2_filsys fs);
errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
@@ -995,9 +999,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
get_now(&now);
- datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
- dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
- dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+ datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+ dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+ dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
/*
* If atime is newer than mtime and atime hasn't been updated in thirty
@@ -6459,6 +6463,93 @@ static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
fuse_reply_err(req, 0);
}
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse4fs_max_size(struct fuse4fs *ff, off_t upper_limit)
+{
+ off_t res;
+
+ if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+ upper_limit = (1LL << 32) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (ff->blocklog - 9);
+ upper_limit <<= ff->blocklog;
+ }
+
+ /*
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
+ * by one fs block, so ee_len can cover the extent of maximum file
+ * size
+ */
+ res = (1LL << 32) - 1;
+ res <<= ff->blocklog;
+
+ /* Sanity check against vm- & vfs- imposed limits */
+ if (res > upper_limit)
+ res = upper_limit;
+
+ return res;
+}
+
+static void op_iomap_config(fuse_req_t req,
+ const struct fuse_iomap_config_params *p,
+ size_t psize)
+{
+ struct fuse_iomap_config cfg = { };
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_filsys fs;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+
+ dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+ (unsigned long long)p->flags,
+ (unsigned long long)p->maxbytes);
+ fs = fuse4fs_start(ff);
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_UUID;
+ memcpy(cfg.s_uuid, fs->super->s_uuid, sizeof(cfg.s_uuid));
+ cfg.s_uuid_len = sizeof(fs->super->s_uuid);
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+ cfg.s_blocksize = FUSE4FS_FSB_TO_B(ff, 1);
+
+ /*
+ * If there inode is large enough to house i_[acm]time_extra then we
+ * can turn on nanosecond timestamps; i_crtime was the next field added
+ * after i_atime_extra.
+ */
+ cfg.flags |= FUSE_IOMAP_CONFIG_TIME;
+ if (fs->super->s_inode_size >=
+ offsetof(struct ext2_inode_large, i_crtime)) {
+ cfg.s_time_gran = 1;
+ cfg.s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ cfg.s_time_gran = NSEC_PER_SEC;
+ cfg.s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ cfg.s_time_min = EXT4_TIMESTAMP_MIN;
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+ cfg.s_maxbytes = fuse4fs_max_size(ff, p->maxbytes);
+
+ fuse4fs_finish(ff, 0);
+ fuse_reply_iomap_config(req, &cfg);
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_lowlevel_ops fs_ops = {
@@ -6507,6 +6598,7 @@ static struct fuse_lowlevel_ops fs_ops = {
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
+ .iomap_config = op_iomap_config,
#endif /* HAVE_FUSE_IOMAP */
};
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 15ebe6b39f1288..7df4e127e5981a 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -191,6 +191,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
# define FL_ZERO_RANGE_FLAG (0)
#endif
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC (1000000000L)
+#endif
+
errcode_t ext2fs_check_ext3_journal(ext2_filsys fs);
errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
@@ -806,9 +810,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
get_now(&now);
- datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
- dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
- dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+ datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+ dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+ dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
/*
* If atime is newer than mtime and atime hasn't been updated in thirty
@@ -5692,6 +5696,91 @@ static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
return 0;
}
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse2fs_max_size(struct fuse2fs *ff, off_t upper_limit)
+{
+ off_t res;
+
+ if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+ upper_limit = (1LL << 32) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (ff->blocklog - 9);
+ upper_limit <<= ff->blocklog;
+ }
+
+ /*
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
+ * by one fs block, so ee_len can cover the extent of maximum file
+ * size
+ */
+ res = (1LL << 32) - 1;
+ res <<= ff->blocklog;
+
+ /* Sanity check against vm- & vfs- imposed limits */
+ if (res > upper_limit)
+ res = upper_limit;
+
+ return res;
+}
+
+static int op_iomap_config(const struct fuse_iomap_config_params *p,
+ size_t psize, struct fuse_iomap_config *cfg)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ ext2_filsys fs;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+
+ dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+ (unsigned long long)p->flags,
+ (unsigned long long)p->maxbytes);
+ fs = fuse2fs_start(ff);
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_UUID;
+ memcpy(cfg->s_uuid, fs->super->s_uuid, sizeof(cfg->s_uuid));
+ cfg->s_uuid_len = sizeof(fs->super->s_uuid);
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+ cfg->s_blocksize = FUSE2FS_FSB_TO_B(ff, 1);
+
+ /*
+ * If there inode is large enough to house i_[acm]time_extra then we
+ * can turn on nanosecond timestamps; i_crtime was the next field added
+ * after i_atime_extra.
+ */
+ cfg->flags |= FUSE_IOMAP_CONFIG_TIME;
+ if (fs->super->s_inode_size >=
+ offsetof(struct ext2_inode_large, i_crtime)) {
+ cfg->s_time_gran = 1;
+ cfg->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ cfg->s_time_gran = NSEC_PER_SEC;
+ cfg->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ cfg->s_time_min = EXT4_TIMESTAMP_MIN;
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+ cfg->s_maxbytes = fuse2fs_max_size(ff, p->maxbytes);
+
+ fuse2fs_finish(ff, 0);
+ return 0;
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_operations fs_ops = {
@@ -5738,6 +5827,7 @@ static struct fuse_operations fs_ops = {
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
+ .iomap_config = op_iomap_config,
#endif /* HAVE_FUSE_IOMAP */
};
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 04/19] fuse2fs: register block devices for use with iomap
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (2 preceding siblings ...)
2026-04-29 14:53 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
@ 2026-04-29 14:53 ` Darrick J. Wong
2026-04-29 14:53 ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
` (14 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:53 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Register the ext4 block device with the kernel for use with iomap.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 44 ++++++++++++++++++++++++++++++++++++++++----
misc/fuse2fs.c | 42 ++++++++++++++++++++++++++++++++++++++----
2 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index feb46bdfbac39b..3e9852f585302d 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -284,6 +284,7 @@ struct fuse4fs {
#ifdef HAVE_FUSE_IOMAP
enum fuse4fs_feature_toggle iomap_want;
enum fuse4fs_iomap_state iomap_state;
+ uint32_t iomap_dev;
#endif
unsigned int blockmask;
unsigned long offset;
@@ -6247,7 +6248,7 @@ static errcode_t fuse4fs_iomap_begin_extent(struct fuse4fs *ff, uint64_t ino,
}
/* Mapping overlaps startoff, report this. */
- iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->dev = ff->iomap_dev;
iomap->addr = FUSE4FS_FSB_TO_B(ff, extent.e_pblk) + ff->offset;
iomap->offset = FUSE4FS_FSB_TO_B(ff, extent.e_lblk);
iomap->length = FUSE4FS_FSB_TO_B(ff, extent.e_len);
@@ -6280,13 +6281,14 @@ static int fuse4fs_iomap_begin_indirect(struct fuse4fs *ff, uint64_t ino,
if (err)
return translate_error(fs, ino, err);
- iomap->dev = FUSE_IOMAP_DEV_NULL;
iomap->offset = FUSE4FS_FSB_TO_B(ff, startoff);
iomap->flags |= FUSE_IOMAP_F_MERGED;
if (startblock) {
+ iomap->dev = ff->iomap_dev;
iomap->addr = FUSE4FS_FSB_TO_B(ff, startblock) + ff->offset;
iomap->type = FUSE_IOMAP_TYPE_MAPPED;
} else {
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
iomap->addr = FUSE_IOMAP_NULL_ADDR;
iomap->type = FUSE_IOMAP_TYPE_HOLE;
}
@@ -6506,6 +6508,30 @@ static off_t fuse4fs_max_size(struct fuse4fs *ff, off_t upper_limit)
return res;
}
+static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
+{
+ errcode_t err;
+ int fd;
+ int ret;
+
+ err = io_channel_get_fd(ff->fs->io, &fd);
+ if (err)
+ return translate_error(ff->fs, 0, err);
+
+ ret = fuse_lowlevel_iomap_device_add(ff->fuse, fd, 0);
+ if (ret < 0) {
+ dbg_printf(ff, "%s: cannot register iomap dev fd=%d, err=%d\n",
+ __func__, fd, -ret);
+ return translate_error(ff->fs, 0, -ret);
+ }
+
+ dbg_printf(ff, "%s: registered iomap dev fd=%d iomap_dev=%u\n",
+ __func__, fd, ff->iomap_dev);
+
+ ff->iomap_dev = ret;
+ return 0;
+}
+
static void op_iomap_config(fuse_req_t req,
const struct fuse_iomap_config_params *p,
size_t psize)
@@ -6513,6 +6539,7 @@ static void op_iomap_config(fuse_req_t req,
struct fuse_iomap_config cfg = { };
struct fuse4fs *ff = fuse4fs_get(req);
ext2_filsys fs;
+ int ret = 0;
FUSE4FS_CHECK_CONTEXT(req);
@@ -6547,8 +6574,16 @@ static void op_iomap_config(fuse_req_t req,
cfg.flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
cfg.s_maxbytes = fuse4fs_max_size(ff, p->maxbytes);
- fuse4fs_finish(ff, 0);
- fuse_reply_iomap_config(req, &cfg);
+ ret = fuse4fs_iomap_config_devices(ff);
+ if (ret)
+ goto out_unlock;
+
+out_unlock:
+ fuse4fs_finish(ff, ret);
+ if (ret)
+ fuse_reply_err(req, -ret);
+ else
+ fuse_reply_iomap_config(req, &cfg);
}
#endif /* HAVE_FUSE_IOMAP */
@@ -7084,6 +7119,7 @@ int main(int argc, char *argv[])
#ifdef HAVE_FUSE_IOMAP
.iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
+ .iomap_dev = FUSE_IOMAP_DEV_NULL,
#endif
};
errcode_t err;
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 7df4e127e5981a..c24ae461dad2ad 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -40,6 +40,7 @@
# define _FILE_OFFSET_BITS 64
#endif /* _FILE_OFFSET_BITS */
#include <fuse.h>
+#include <fuse_lowlevel.h>
#ifdef __SET_FOB_FOR_FUSE
# undef _FILE_OFFSET_BITS
#endif /* __SET_FOB_FOR_FUSE */
@@ -266,6 +267,7 @@ struct fuse2fs {
#ifdef HAVE_FUSE_IOMAP
enum fuse2fs_feature_toggle iomap_want;
enum fuse2fs_iomap_state iomap_state;
+ uint32_t iomap_dev;
#endif
unsigned int blockmask;
unsigned long offset;
@@ -5481,7 +5483,7 @@ static errcode_t fuse2fs_iomap_begin_extent(struct fuse2fs *ff, uint64_t ino,
}
/* Mapping overlaps startoff, report this. */
- iomap->dev = FUSE_IOMAP_DEV_NULL;
+ iomap->dev = ff->iomap_dev;
iomap->addr = FUSE2FS_FSB_TO_B(ff, extent.e_pblk) + ff->offset;
iomap->offset = FUSE2FS_FSB_TO_B(ff, extent.e_lblk);
iomap->length = FUSE2FS_FSB_TO_B(ff, extent.e_len);
@@ -5514,13 +5516,14 @@ static int fuse2fs_iomap_begin_indirect(struct fuse2fs *ff, uint64_t ino,
if (err)
return translate_error(fs, ino, err);
- iomap->dev = FUSE_IOMAP_DEV_NULL;
iomap->offset = FUSE2FS_FSB_TO_B(ff, startoff);
iomap->flags |= FUSE_IOMAP_F_MERGED;
if (startblock) {
+ iomap->dev = ff->iomap_dev;
iomap->addr = FUSE2FS_FSB_TO_B(ff, startblock) + ff->offset;
iomap->type = FUSE_IOMAP_TYPE_MAPPED;
} else {
+ iomap->dev = FUSE_IOMAP_DEV_NULL;
iomap->addr = FUSE_IOMAP_NULL_ADDR;
iomap->type = FUSE_IOMAP_TYPE_HOLE;
}
@@ -5739,11 +5742,36 @@ static off_t fuse2fs_max_size(struct fuse2fs *ff, off_t upper_limit)
return res;
}
+static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
+{
+ errcode_t err;
+ int fd;
+ int ret;
+
+ err = io_channel_get_fd(ff->fs->io, &fd);
+ if (err)
+ return translate_error(ff->fs, 0, err);
+
+ ret = fuse_fs_iomap_device_add(fd, 0);
+ if (ret < 0) {
+ dbg_printf(ff, "%s: cannot register iomap dev fd=%d, err=%d\n",
+ __func__, fd, -ret);
+ return translate_error(ff->fs, 0, -ret);
+ }
+
+ dbg_printf(ff, "%s: registered iomap dev fd=%d iomap_dev=%u\n",
+ __func__, fd, ff->iomap_dev);
+
+ ff->iomap_dev = ret;
+ return 0;
+}
+
static int op_iomap_config(const struct fuse_iomap_config_params *p,
size_t psize, struct fuse_iomap_config *cfg)
{
struct fuse2fs *ff = fuse2fs_get();
ext2_filsys fs;
+ int ret = 0;
FUSE2FS_CHECK_CONTEXT(ff);
@@ -5778,8 +5806,13 @@ static int op_iomap_config(const struct fuse_iomap_config_params *p,
cfg->flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
cfg->s_maxbytes = fuse2fs_max_size(ff, p->maxbytes);
- fuse2fs_finish(ff, 0);
- return 0;
+ ret = fuse2fs_iomap_config_devices(ff);
+ if (ret)
+ goto out_unlock;
+
+out_unlock:
+ fuse2fs_finish(ff, ret);
+ return ret;
}
#endif /* HAVE_FUSE_IOMAP */
@@ -6145,6 +6178,7 @@ int main(int argc, char *argv[])
#ifdef HAVE_FUSE_IOMAP
.iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
+ .iomap_dev = FUSE_IOMAP_DEV_NULL,
#endif
};
errcode_t err;
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 05/19] fuse2fs: implement directio file reads
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (3 preceding siblings ...)
2026-04-29 14:53 ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
@ 2026-04-29 14:53 ` Darrick J. Wong
2026-04-29 14:54 ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
` (13 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:53 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Implement file reads via iomap. Currently only directio is supported.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 14 +++++++++++++-
misc/fuse2fs.c | 14 +++++++++++++-
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 3e9852f585302d..a1d931aed8f393 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -6370,7 +6370,19 @@ static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
uint64_t count, uint32_t opflags,
struct fuse_file_iomap *read)
{
- return -ENOSYS;
+ if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+ return -ENOSYS;
+
+ /* fall back to slow path for inline data reads */
+ if (inode->i_flags & EXT4_INLINE_DATA_FL)
+ return -ENOSYS;
+
+ if (inode->i_flags & EXT4_EXTENTS_FL)
+ return fuse4fs_iomap_begin_extent(ff, ino, inode, pos, count,
+ opflags, read);
+
+ return fuse4fs_iomap_begin_indirect(ff, ino, inode, pos, count,
+ opflags, read);
}
static int fuse4fs_iomap_begin_write(struct fuse4fs *ff, ext2_ino_t ino,
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c24ae461dad2ad..739867fa41dd91 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -5605,7 +5605,19 @@ static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
uint64_t count, uint32_t opflags,
struct fuse_file_iomap *read)
{
- return -ENOSYS;
+ if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+ return -ENOSYS;
+
+ /* fall back to slow path for inline data reads */
+ if (inode->i_flags & EXT4_INLINE_DATA_FL)
+ return -ENOSYS;
+
+ if (inode->i_flags & EXT4_EXTENTS_FL)
+ return fuse2fs_iomap_begin_extent(ff, ino, inode, pos, count,
+ opflags, read);
+
+ return fuse2fs_iomap_begin_indirect(ff, ino, inode, pos, count,
+ opflags, read);
}
static int fuse2fs_iomap_begin_write(struct fuse2fs *ff, ext2_ino_t ino,
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 06/19] fuse2fs: add extent dump function for debugging
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (4 preceding siblings ...)
2026-04-29 14:53 ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
@ 2026-04-29 14:54 ` Darrick J. Wong
2026-04-29 14:54 ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
` (12 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:54 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Add a function to dump an inode's extent map for debugging purposes.
This helped debug a problem with generic/299 failing on 1k fsblock
filesystems:
--- a/tests/generic/299.out 2025-07-15 14:45:15.030113607 -0700
+++ b/tests/generic/299.out.bad 2025-07-16 19:33:50.889344998 -0700
@@ -3,3 +3,4 @@ QA output created by 299
Run fio with random aio-dio pattern
Start fallocate/truncate loop
+fio: io_u error on file /opt/direct_aio.0.0: Input/output error: write offset=2602827776, buflen=131072
(The cause of this was misuse of the libext2fs extent code)
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 140 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index a1d931aed8f393..1489be2104f2b2 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -917,6 +917,74 @@ static inline int fuse4fs_iomap_enabled(const struct fuse4fs *ff)
# define fuse4fs_iomap_enabled(...) (0)
#endif
+static inline void fuse4fs_dump_extents(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ const char *why)
+{
+ ext2_filsys fs = ff->fs;
+ unsigned int nr = 0;
+ blk64_t blockcount = 0;
+ struct ext2_inode_large xinode;
+ struct ext2fs_extent extent;
+ ext2_extent_handle_t extents;
+ int op = EXT2_EXTENT_ROOT;
+ errcode_t retval;
+
+ if (!inode) {
+ inode = &xinode;
+
+ retval = fuse4fs_read_inode(fs, ino, inode);
+ if (retval) {
+ com_err(__func__, retval, _("reading ino %u"), ino);
+ return;
+ }
+ }
+
+ if (!(inode->i_flags & EXT4_EXTENTS_FL))
+ return;
+
+ printf("%s: %s ino=%u isize %llu iblocks %llu\n", __func__, why, ino,
+ EXT2_I_SIZE(inode),
+ (ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode)) * 512) /
+ fs->blocksize);
+ fflush(stdout);
+
+ retval = ext2fs_extent_open(fs, ino, &extents);
+ if (retval) {
+ com_err(__func__, retval, _("opening extents of ino \"%u\""),
+ ino);
+ return;
+ }
+
+ while ((retval = ext2fs_extent_get(extents, op, &extent)) == 0) {
+ op = EXT2_EXTENT_NEXT;
+
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
+ continue;
+
+ printf("[%u]: %s ino=%u lblk 0x%llx pblk 0x%llx len 0x%x flags 0x%x\n",
+ nr++, why, ino, extent.e_lblk, extent.e_pblk,
+ extent.e_len, extent.e_flags);
+ fflush(stdout);
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)
+ blockcount += extent.e_len;
+ else
+ blockcount++;
+ }
+ if (retval == EXT2_ET_EXTENT_NO_NEXT)
+ retval = 0;
+ if (retval) {
+ com_err(__func__, retval, ("getting extents of ino %u"),
+ ino);
+ }
+ if (inode->i_file_acl)
+ blockcount++;
+ printf("%s: %s sum(e_len) %llu\n", __func__, why, blockcount);
+ fflush(stdout);
+
+ ext2fs_extent_free(extents);
+}
+
static void get_now(struct timespec *now)
{
#ifdef CLOCK_REALTIME
@@ -6444,6 +6512,8 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
/* Not filling even the first byte will make the kernel unhappy. */
if (read.offset > pos || read.offset + read.length <= pos) {
+ if (ff->debug)
+ fuse4fs_dump_extents(ff, ino, &inode, "BAD DATA");
ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
goto out_unlock;
}
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 739867fa41dd91..4b37bcde63d0f2 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -729,6 +729,74 @@ static inline int fuse2fs_iomap_enabled(const struct fuse2fs *ff)
# define fuse2fs_iomap_enabled(...) (0)
#endif
+static inline void fuse2fs_dump_extents(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ const char *why)
+{
+ ext2_filsys fs = ff->fs;
+ unsigned int nr = 0;
+ blk64_t blockcount = 0;
+ struct ext2_inode_large xinode;
+ struct ext2fs_extent extent;
+ ext2_extent_handle_t extents;
+ int op = EXT2_EXTENT_ROOT;
+ errcode_t retval;
+
+ if (!inode) {
+ inode = &xinode;
+
+ retval = fuse2fs_read_inode(fs, ino, inode);
+ if (retval) {
+ com_err(__func__, retval, _("reading ino %u"), ino);
+ return;
+ }
+ }
+
+ if (!(inode->i_flags & EXT4_EXTENTS_FL))
+ return;
+
+ printf("%s: %s ino=%u isize %llu iblocks %llu\n", __func__, why, ino,
+ EXT2_I_SIZE(inode),
+ (ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode)) * 512) /
+ fs->blocksize);
+ fflush(stdout);
+
+ retval = ext2fs_extent_open(fs, ino, &extents);
+ if (retval) {
+ com_err(__func__, retval, _("opening extents of ino \"%u\""),
+ ino);
+ return;
+ }
+
+ while ((retval = ext2fs_extent_get(extents, op, &extent)) == 0) {
+ op = EXT2_EXTENT_NEXT;
+
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
+ continue;
+
+ printf("[%u]: %s ino=%u lblk 0x%llx pblk 0x%llx len 0x%x flags 0x%x\n",
+ nr++, why, ino, extent.e_lblk, extent.e_pblk,
+ extent.e_len, extent.e_flags);
+ fflush(stdout);
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)
+ blockcount += extent.e_len;
+ else
+ blockcount++;
+ }
+ if (retval == EXT2_ET_EXTENT_NO_NEXT)
+ retval = 0;
+ if (retval) {
+ com_err(__func__, retval, ("getting extents of ino %u"),
+ ino);
+ }
+ if (inode->i_file_acl)
+ blockcount++;
+ printf("%s: %s sum(e_len) %llu\n", __func__, why, blockcount);
+ fflush(stdout);
+
+ ext2fs_extent_free(extents);
+}
+
static void get_now(struct timespec *now)
{
#ifdef CLOCK_REALTIME
@@ -5681,6 +5749,8 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
/* Not filling even the first byte will make the kernel unhappy. */
if (read->offset > pos || read->offset + read->length <= pos) {
+ if (ff->debug)
+ fuse2fs_dump_extents(ff, attr_ino, &inode, "BAD DATA");
ret = translate_error(fs, attr_ino, EXT2_ET_INODE_CORRUPTED);
goto out_unlock;
}
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 07/19] fuse2fs: implement direct write support
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (5 preceding siblings ...)
2026-04-29 14:54 ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
@ 2026-04-29 14:54 ` Darrick J. Wong
2026-04-29 14:54 ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
` (11 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:54 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Wire up an iomap_begin method that can allocate into holes so that we
can do directio writes.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 477 +++++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 471 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 942 insertions(+), 6 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 1489be2104f2b2..8b508de5b8cb65 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -6453,12 +6453,106 @@ static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
opflags, read);
}
+static int fuse4fs_iomap_write_allocate(struct fuse4fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, uint64_t count,
+ uint32_t opflags,
+ struct fuse_file_iomap *read,
+ bool *dirty)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+ blk64_t stopoff = FUSE4FS_B_TO_FSB(ff, pos + count);
+ blk64_t old_iblocks;
+ errcode_t err;
+ int ret;
+
+ dbg_printf(ff,
+ "%s: ino=%d startoff 0x%llx blockcount 0x%llx\n",
+ __func__, ino, startoff, stopoff - startoff);
+
+ if (!fuse4fs_can_allocate(ff, stopoff - startoff))
+ return -ENOSPC;
+
+ old_iblocks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
+ err = ext2fs_fallocate(fs, EXT2_FALLOCATE_FORCE_UNINIT, ino,
+ EXT2_INODE(inode), ~0ULL, startoff,
+ stopoff - startoff);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /*
+ * New allocations for file data blocks on indirect mapped files are
+ * zeroed through the IO manager so we have to flush it to disk.
+ */
+ if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+ old_iblocks != ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode))) {
+ err = io_channel_flush(fs->io);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /* pick up the newly allocated mapping */
+ ret = fuse4fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+ read);
+ if (ret)
+ return ret;
+
+ read->flags |= FUSE_IOMAP_F_DIRTY;
+ *dirty = true;
+ return 0;
+}
+
+static off_t fuse4fs_max_file_size(const struct fuse4fs *ff,
+ const struct ext2_inode_large *inode)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t addr_per_block, max_map_block;
+
+ if (inode->i_flags & EXT4_EXTENTS_FL) {
+ max_map_block = (1ULL << 32) - 1;
+ } else {
+ addr_per_block = fs->blocksize >> 2;
+ max_map_block = addr_per_block;
+ max_map_block += addr_per_block * addr_per_block;
+ max_map_block += addr_per_block * addr_per_block * addr_per_block;
+ max_map_block += 12;
+ }
+
+ return FUSE4FS_FSB_TO_B(ff, max_map_block) + (fs->blocksize - 1);
+}
+
static int fuse4fs_iomap_begin_write(struct fuse4fs *ff, ext2_ino_t ino,
struct ext2_inode_large *inode, off_t pos,
uint64_t count, uint32_t opflags,
- struct fuse_file_iomap *read)
+ struct fuse_file_iomap *read,
+ bool *dirty)
{
- return -ENOSYS;
+ off_t max_size = fuse4fs_max_file_size(ff, inode);
+ int ret;
+
+ if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+ return -ENOSYS;
+
+ if (pos >= max_size)
+ return -EFBIG;
+
+ if (pos >= max_size - count)
+ count = max_size - pos;
+
+ ret = fuse4fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+ read);
+ if (ret)
+ return ret;
+
+ if (fuse_iomap_need_write_allocate(opflags, read)) {
+ ret = fuse4fs_iomap_write_allocate(ff, ino, inode, pos, count,
+ opflags, read, dirty);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
@@ -6470,6 +6564,7 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
ext2_filsys fs;
ext2_ino_t ino;
errcode_t err;
+ bool dirty = false;
int ret = 0;
FUSE4FS_CHECK_CONTEXT(req);
@@ -6493,7 +6588,7 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
opflags, &read);
else if (fuse_iomap_is_write(opflags))
ret = fuse4fs_iomap_begin_write(ff, ino, &inode, pos, count,
- opflags, &read);
+ opflags, &read, &dirty);
else
ret = fuse4fs_iomap_begin_read(ff, ino, &inode, pos, count,
opflags, &read);
@@ -6518,6 +6613,14 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
goto out_unlock;
}
+ if (dirty) {
+ err = fuse4fs_write_inode(fs, ino, &inode);
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_unlock;
+ }
+ }
+
out_unlock:
fuse4fs_finish(ff, ret);
if (ret)
@@ -6667,6 +6770,373 @@ static void op_iomap_config(fuse_req_t req,
else
fuse_reply_iomap_config(req, &cfg);
}
+
+static inline bool fuse4fs_can_merge_mappings(const struct ext2fs_extent *left,
+ const struct ext2fs_extent *right)
+{
+ uint64_t max_len = (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ?
+ EXT_UNINIT_MAX_LEN : EXT_INIT_MAX_LEN;
+
+ return left->e_lblk + left->e_len == right->e_lblk &&
+ left->e_pblk + left->e_len == right->e_pblk &&
+ (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
+ (right->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
+ (uint64_t)left->e_len + right->e_len <= max_len;
+}
+
+static int fuse4fs_try_merge_mappings(struct fuse4fs *ff, ext2_ino_t ino,
+ ext2_extent_handle_t handle,
+ blk64_t startoff)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2fs_extent left, right;
+ errcode_t err;
+
+ /* Look up the mappings before startoff */
+ err = fuse4fs_get_mapping_at(ff, handle, startoff - 1, &left);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND)
+ return 0;
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Look up the mapping at startoff */
+ err = fuse4fs_get_mapping_at(ff, handle, startoff, &right);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND)
+ return 0;
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Can we combine them? */
+ if (!fuse4fs_can_merge_mappings(&left, &right))
+ return 0;
+
+ /*
+ * Delete the mapping after startoff because libext2fs cannot handle
+ * overlapping mappings.
+ */
+ err = ext2fs_extent_delete(handle, 0);
+ DUMP_EXTENT(ff, "remover", startoff, err, &right);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixremover", startoff, err, &right);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Move back and lengthen the mapping before startoff */
+ err = ext2fs_extent_goto(handle, left.e_lblk);
+ DUMP_EXTENT(ff, "movel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ left.e_len += right.e_len;
+ err = ext2fs_extent_replace(handle, 0, &left);
+ DUMP_EXTENT(ff, "replacel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixreplacel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ return 0;
+}
+
+static int fuse4fs_convert_unwritten_mapping(struct fuse4fs *ff,
+ ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ ext2_extent_handle_t handle,
+ blk64_t *cursor, blk64_t stopoff)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2fs_extent extent;
+ blk64_t startoff = *cursor;
+ errcode_t err;
+
+ /*
+ * Find the mapping at startoff. Note that we can find holes because
+ * the mapping data can change due to racing writes.
+ */
+ err = fuse4fs_get_mapping_at(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ /*
+ * If we didn't find any mappings at all then the file is
+ * completely sparse. There's nothing to convert.
+ */
+ *cursor = stopoff;
+ return 0;
+ }
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /*
+ * The mapping is completely to the left of the range that we want.
+ * Let's see what's in the next extent, if there is one.
+ */
+ if (startoff >= extent.e_lblk + extent.e_len) {
+ /*
+ * Mapping ends to the left of the current position. Try to
+ * find the next mapping. If there is no next mapping, then
+ * we're done.
+ */
+ err = fuse4fs_get_next_mapping(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ *cursor = stopoff;
+ return 0;
+ }
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /*
+ * The mapping is completely to the right of the range that we want,
+ * so we're done.
+ */
+ if (extent.e_lblk >= stopoff) {
+ *cursor = stopoff;
+ return 0;
+ }
+
+ /*
+ * At this point, we have a mapping that overlaps (startoff, stopoff].
+ * If the mapping is already written, move on to the next one.
+ */
+ if (!(extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT))
+ goto next;
+
+ if (startoff > extent.e_lblk) {
+ struct ext2fs_extent newex = extent;
+
+ /*
+ * Unwritten mapping starts before startoff. Shorten
+ * the previous mapping...
+ */
+ newex.e_len = startoff - extent.e_lblk;
+ err = ext2fs_extent_replace(handle, 0, &newex);
+ DUMP_EXTENT(ff, "shortenp", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixshortenp", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* ...and create new written mapping at startoff. */
+ extent.e_len -= newex.e_len;
+ extent.e_lblk += newex.e_len;
+ extent.e_pblk += newex.e_len;
+ extent.e_flags = newex.e_flags & ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_insert(handle,
+ EXT2_EXTENT_INSERT_AFTER,
+ &extent);
+ DUMP_EXTENT(ff, "insertx", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixinsertx", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ if (extent.e_lblk + extent.e_len > stopoff) {
+ struct ext2fs_extent newex = extent;
+
+ /*
+ * Unwritten mapping ends after stopoff. Shorten the current
+ * mapping...
+ */
+ extent.e_len = stopoff - extent.e_lblk;
+ extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_replace(handle, 0, &extent);
+ DUMP_EXTENT(ff, "shortenn", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixshortenn", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* ..and create a new unwritten mapping at stopoff. */
+ newex.e_pblk += extent.e_len;
+ newex.e_lblk += extent.e_len;
+ newex.e_len -= extent.e_len;
+ newex.e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_insert(handle,
+ EXT2_EXTENT_INSERT_AFTER,
+ &newex);
+ DUMP_EXTENT(ff, "insertn", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixinsertn", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /* Still unwritten? Update the state. */
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
+ extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_replace(handle, 0, &extent);
+ DUMP_EXTENT(ff, "replacex", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixreplacex", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+next:
+ /* Try to merge with the previous extent */
+ if (startoff > 0) {
+ err = fuse4fs_try_merge_mappings(ff, ino, handle, startoff);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ *cursor = extent.e_lblk + extent.e_len;
+ return 0;
+}
+
+static int fuse4fs_convert_unwritten_mappings(struct fuse4fs *ff,
+ ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, size_t written)
+{
+ ext2_extent_handle_t handle;
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+ const blk64_t stopoff = FUSE4FS_B_TO_FSB(ff, pos + written);
+ errcode_t err;
+ int ret;
+
+ err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Walk every mapping in the range, converting them. */
+ while (startoff < stopoff) {
+ blk64_t old_startoff = startoff;
+
+ ret = fuse4fs_convert_unwritten_mapping(ff, ino, inode, handle,
+ &startoff, stopoff);
+ if (ret)
+ goto out_handle;
+ if (startoff <= old_startoff) {
+ /* Do not go backwards. */
+ ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+ goto out_handle;
+ }
+ }
+
+ /* Try to merge the right edge */
+ ret = fuse4fs_try_merge_mappings(ff, ino, handle, stopoff);
+out_handle:
+ ext2fs_extent_free(handle);
+ return ret;
+}
+
+static void op_iomap_ioend(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
+ off_t pos, size_t written, uint32_t ioendflags,
+ int error, uint32_t dev, uint64_t new_addr)
+{
+ struct fuse4fs *ff = fuse4fs_get(req);
+ struct ext2_inode_large inode;
+ ext2_filsys fs;
+ ext2_ino_t ino;
+ ext2_off64_t isize;
+ errcode_t err;
+ bool dirty = false;
+ off_t newsize = -1;
+ int ret = 0;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ FUSE4FS_CONVERT_FINO(req, &ino, fino);
+
+ dbg_printf(ff,
+ "%s: ino=%d pos=0x%llx written=0x%zx ioendflags=0x%x error=%d dev=%u new_addr=0x%llx\n",
+ __func__, ino,
+ (unsigned long long)pos,
+ written,
+ ioendflags,
+ error,
+ dev,
+ (unsigned long long)new_addr);
+
+ if (error) {
+ fuse_reply_err(req, -error);
+ return;
+ }
+
+ fs = fuse4fs_start(ff);
+
+ /* should never see these ioend types */
+ if (ioendflags & FUSE_IOMAP_IOEND_SHARED) {
+ ret = translate_error(fs, ino, EXT2_ET_FILESYSTEM_CORRUPTED);
+ goto out_unlock;
+ }
+
+ err = fuse4fs_read_inode(fs, ino, &inode);
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_unlock;
+ }
+
+ if (ioendflags & FUSE_IOMAP_IOEND_UNWRITTEN) {
+ /* unwritten extents are only supported on extents files */
+ if (!(inode.i_flags & EXT4_EXTENTS_FL)) {
+ ret = translate_error(fs, ino,
+ EXT2_ET_FILESYSTEM_CORRUPTED);
+ goto out_unlock;
+ }
+
+ ret = fuse4fs_convert_unwritten_mappings(ff, ino, &inode,
+ pos, written);
+ if (ret)
+ goto out_unlock;
+
+ dirty = true;
+ }
+
+ isize = EXT2_I_SIZE(&inode);
+ if (pos + written > isize) {
+ err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode),
+ pos + written);
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_unlock;
+ }
+
+ dirty = true;
+ }
+
+ if (dirty) {
+ err = fuse4fs_write_inode(fs, ino, &inode);
+ if (err) {
+ ret = translate_error(fs, ino, err);
+ goto out_unlock;
+ }
+ }
+
+ newsize = EXT2_I_SIZE(&inode);
+out_unlock:
+ fuse4fs_finish(ff, ret);
+ if (ret)
+ fuse_reply_err(req, -ret);
+ else
+ fuse_reply_iomap_ioend(req, newsize);
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_lowlevel_ops fs_ops = {
@@ -6716,6 +7186,7 @@ static struct fuse_lowlevel_ops fs_ops = {
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
.iomap_config = op_iomap_config,
+ .iomap_ioend = op_iomap_ioend,
#endif /* HAVE_FUSE_IOMAP */
};
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 4b37bcde63d0f2..67a5bc4c5cc986 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -5688,12 +5688,103 @@ static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
opflags, read);
}
+static int fuse2fs_iomap_write_allocate(struct fuse2fs *ff, ext2_ino_t ino,
+ struct ext2_inode_large *inode, off_t pos,
+ uint64_t count, uint32_t opflags,
+ struct fuse_file_iomap *read, bool *dirty)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+ blk64_t stopoff = FUSE2FS_B_TO_FSB(ff, pos + count);
+ blk64_t old_iblocks;
+ errcode_t err;
+ int ret;
+
+ dbg_printf(ff, "%s: write_alloc ino=%u startoff 0x%llx blockcount 0x%llx\n",
+ __func__, ino, startoff, stopoff - startoff);
+
+ if (!fs_can_allocate(ff, stopoff - startoff))
+ return -ENOSPC;
+
+ old_iblocks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
+ err = ext2fs_fallocate(fs, EXT2_FALLOCATE_FORCE_UNINIT, ino,
+ EXT2_INODE(inode), ~0ULL, startoff,
+ stopoff - startoff);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /*
+ * New allocations for file data blocks on indirect mapped files are
+ * zeroed through the IO manager so we have to flush it to disk.
+ */
+ if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+ old_iblocks != ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode))) {
+ err = io_channel_flush(fs->io);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /* pick up the newly allocated mapping */
+ ret = fuse2fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+ read);
+ if (ret)
+ return ret;
+
+ read->flags |= FUSE_IOMAP_F_DIRTY;
+ *dirty = true;
+ return 0;
+}
+
+static off_t fuse2fs_max_file_size(const struct fuse2fs *ff,
+ const struct ext2_inode_large *inode)
+{
+ ext2_filsys fs = ff->fs;
+ blk64_t addr_per_block, max_map_block;
+
+ if (inode->i_flags & EXT4_EXTENTS_FL) {
+ max_map_block = (1ULL << 32) - 1;
+ } else {
+ addr_per_block = fs->blocksize >> 2;
+ max_map_block = addr_per_block;
+ max_map_block += addr_per_block * addr_per_block;
+ max_map_block += addr_per_block * addr_per_block * addr_per_block;
+ max_map_block += 12;
+ }
+
+ return FUSE2FS_FSB_TO_B(ff, max_map_block) + (fs->blocksize - 1);
+}
+
static int fuse2fs_iomap_begin_write(struct fuse2fs *ff, ext2_ino_t ino,
struct ext2_inode_large *inode, off_t pos,
uint64_t count, uint32_t opflags,
- struct fuse_file_iomap *read)
+ struct fuse_file_iomap *read,
+ bool *dirty)
{
- return -ENOSYS;
+ off_t max_size = fuse2fs_max_file_size(ff, inode);
+ int ret;
+
+ if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+ return -ENOSYS;
+
+ if (pos >= max_size)
+ return -EFBIG;
+
+ if (pos >= max_size - count)
+ count = max_size - pos;
+
+ ret = fuse2fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+ read);
+ if (ret)
+ return ret;
+
+ if (fuse_iomap_need_write_allocate(opflags, read)) {
+ ret = fuse2fs_iomap_write_allocate(ff, ino, inode, pos, count,
+ opflags, read, dirty);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
@@ -5705,6 +5796,7 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
struct ext2_inode_large inode;
ext2_filsys fs;
errcode_t err;
+ bool dirty = false;
int ret = 0;
FUSE2FS_CHECK_CONTEXT(ff);
@@ -5730,7 +5822,7 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
count, opflags, read);
else if (fuse_iomap_is_write(opflags))
ret = fuse2fs_iomap_begin_write(ff, attr_ino, &inode, pos,
- count, opflags, read);
+ count, opflags, read, &dirty);
else
ret = fuse2fs_iomap_begin_read(ff, attr_ino, &inode, pos,
count, opflags, read);
@@ -5755,6 +5847,14 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
goto out_unlock;
}
+ if (dirty) {
+ err = fuse2fs_write_inode(fs, attr_ino, &inode);
+ if (err) {
+ ret = translate_error(fs, attr_ino, err);
+ goto out_unlock;
+ }
+ }
+
out_unlock:
fuse2fs_finish(ff, ret);
return ret;
@@ -5896,6 +5996,370 @@ static int op_iomap_config(const struct fuse_iomap_config_params *p,
fuse2fs_finish(ff, ret);
return ret;
}
+
+static inline bool fuse2fs_can_merge_mappings(const struct ext2fs_extent *left,
+ const struct ext2fs_extent *right)
+{
+ uint64_t max_len = (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ?
+ EXT_UNINIT_MAX_LEN : EXT_INIT_MAX_LEN;
+
+ return left->e_lblk + left->e_len == right->e_lblk &&
+ left->e_pblk + left->e_len == right->e_pblk &&
+ (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
+ (right->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
+ (uint64_t)left->e_len + right->e_len <= max_len;
+}
+
+static int fuse2fs_try_merge_mappings(struct fuse2fs *ff, ext2_ino_t ino,
+ ext2_extent_handle_t handle,
+ blk64_t startoff)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2fs_extent left, right;
+ errcode_t err;
+
+ /* Look up the mappings before startoff */
+ err = fuse2fs_get_mapping_at(ff, handle, startoff - 1, &left);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND)
+ return 0;
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Look up the mapping at startoff */
+ err = fuse2fs_get_mapping_at(ff, handle, startoff, &right);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND)
+ return 0;
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Can we combine them? */
+ if (!fuse2fs_can_merge_mappings(&left, &right))
+ return 0;
+
+ /*
+ * Delete the mapping after startoff because libext2fs cannot handle
+ * overlapping mappings.
+ */
+ err = ext2fs_extent_delete(handle, 0);
+ DUMP_EXTENT(ff, "remover", startoff, err, &right);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixremover", startoff, err, &right);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Move back and lengthen the mapping before startoff */
+ err = ext2fs_extent_goto(handle, left.e_lblk);
+ DUMP_EXTENT(ff, "movel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ left.e_len += right.e_len;
+ err = ext2fs_extent_replace(handle, 0, &left);
+ DUMP_EXTENT(ff, "replacel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixreplacel", startoff - 1, err, &left);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ return 0;
+}
+
+static int fuse2fs_convert_unwritten_mapping(struct fuse2fs *ff,
+ ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ ext2_extent_handle_t handle,
+ blk64_t *cursor, blk64_t stopoff)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2fs_extent extent;
+ blk64_t startoff = *cursor;
+ errcode_t err;
+
+ /*
+ * Find the mapping at startoff. Note that we can find holes because
+ * the mapping data can change due to racing writes.
+ */
+ err = fuse2fs_get_mapping_at(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ /*
+ * If we didn't find any mappings at all then the file is
+ * completely sparse. There's nothing to convert.
+ */
+ *cursor = stopoff;
+ return 0;
+ }
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /*
+ * The mapping is completely to the left of the range that we want.
+ * Let's see what's in the next extent, if there is one.
+ */
+ if (startoff >= extent.e_lblk + extent.e_len) {
+ /*
+ * Mapping ends to the left of the current position. Try to
+ * find the next mapping. If there is no next mapping, then
+ * we're done.
+ */
+ err = fuse2fs_get_next_mapping(ff, handle, startoff, &extent);
+ if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+ *cursor = stopoff;
+ return 0;
+ }
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /*
+ * The mapping is completely to the right of the range that we want,
+ * so we're done.
+ */
+ if (extent.e_lblk >= stopoff) {
+ *cursor = stopoff;
+ return 0;
+ }
+
+ /*
+ * At this point, we have a mapping that overlaps (startoff, stopoff].
+ * If the mapping is already written, move on to the next one.
+ */
+ if (!(extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT))
+ goto next;
+
+ if (startoff > extent.e_lblk) {
+ struct ext2fs_extent newex = extent;
+
+ /*
+ * Unwritten mapping starts before startoff. Shorten
+ * the previous mapping...
+ */
+ newex.e_len = startoff - extent.e_lblk;
+ err = ext2fs_extent_replace(handle, 0, &newex);
+ DUMP_EXTENT(ff, "shortenp", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixshortenp", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* ...and create new written mapping at startoff. */
+ extent.e_len -= newex.e_len;
+ extent.e_lblk += newex.e_len;
+ extent.e_pblk += newex.e_len;
+ extent.e_flags = newex.e_flags & ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_insert(handle,
+ EXT2_EXTENT_INSERT_AFTER,
+ &extent);
+ DUMP_EXTENT(ff, "insertx", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixinsertx", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ if (extent.e_lblk + extent.e_len > stopoff) {
+ struct ext2fs_extent newex = extent;
+
+ /*
+ * Unwritten mapping ends after stopoff. Shorten the current
+ * mapping...
+ */
+ extent.e_len = stopoff - extent.e_lblk;
+ extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_replace(handle, 0, &extent);
+ DUMP_EXTENT(ff, "shortenn", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixshortenn", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* ..and create a new unwritten mapping at stopoff. */
+ newex.e_pblk += extent.e_len;
+ newex.e_lblk += extent.e_len;
+ newex.e_len -= extent.e_len;
+ newex.e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_insert(handle,
+ EXT2_EXTENT_INSERT_AFTER,
+ &newex);
+ DUMP_EXTENT(ff, "insertn", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixinsertn", startoff, err, &newex);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ /* Still unwritten? Update the state. */
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
+ extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+ err = ext2fs_extent_replace(handle, 0, &extent);
+ DUMP_EXTENT(ff, "replacex", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = ext2fs_extent_fix_parents(handle);
+ DUMP_EXTENT(ff, "fixreplacex", startoff, err, &extent);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+next:
+ /* Try to merge with the previous extent */
+ if (startoff > 0) {
+ err = fuse2fs_try_merge_mappings(ff, ino, handle, startoff);
+ if (err)
+ return translate_error(fs, ino, err);
+ }
+
+ *cursor = extent.e_lblk + extent.e_len;
+ return 0;
+}
+
+static int fuse2fs_convert_unwritten_mappings(struct fuse2fs *ff,
+ ext2_ino_t ino,
+ struct ext2_inode_large *inode,
+ off_t pos, size_t written)
+{
+ ext2_extent_handle_t handle;
+ ext2_filsys fs = ff->fs;
+ blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+ const blk64_t stopoff = FUSE2FS_B_TO_FSB(ff, pos + written);
+ errcode_t err;
+ int ret;
+
+ err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ /* Walk every mapping in the range, converting them. */
+ while (startoff < stopoff) {
+ blk64_t old_startoff = startoff;
+
+ ret = fuse2fs_convert_unwritten_mapping(ff, ino, inode, handle,
+ &startoff, stopoff);
+ if (ret)
+ goto out_handle;
+ if (startoff <= old_startoff) {
+ /* Do not go backwards. */
+ ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+ goto out_handle;
+ }
+ }
+
+ /* Try to merge the right edge */
+ ret = fuse2fs_try_merge_mappings(ff, ino, handle, stopoff);
+out_handle:
+ ext2fs_extent_free(handle);
+ return ret;
+}
+
+static int op_iomap_ioend(const char *path, uint64_t nodeid, uint64_t attr_ino,
+ off_t pos, size_t written, uint32_t ioendflags,
+ int error, uint32_t dev, uint64_t new_addr,
+ off_t *newsize)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ struct ext2_inode_large inode;
+ ext2_filsys fs;
+ errcode_t err;
+ ext2_off64_t isize;
+ bool dirty = false;
+ int ret = 0;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+
+ dbg_printf(ff,
+ "%s: path=%s nodeid=%llu attr_ino=%llu pos=0x%llx written=0x%zx ioendflags=0x%x error=%d dev=%u new_addr=%llu\n",
+ __func__, path,
+ (unsigned long long)nodeid,
+ (unsigned long long)attr_ino,
+ (unsigned long long)pos,
+ written,
+ ioendflags,
+ error,
+ dev,
+ (unsigned long long)new_addr);
+
+ fs = fuse2fs_start(ff);
+ if (error) {
+ ret = error;
+ goto out_unlock;
+ }
+
+ /* should never see these ioend types */
+ if (ioendflags & FUSE_IOMAP_IOEND_SHARED) {
+ ret = translate_error(fs, attr_ino,
+ EXT2_ET_FILESYSTEM_CORRUPTED);
+ goto out_unlock;
+ }
+
+ err = fuse2fs_read_inode(fs, attr_ino, &inode);
+ if (err) {
+ ret = translate_error(fs, attr_ino, err);
+ goto out_unlock;
+ }
+
+ if (ioendflags & FUSE_IOMAP_IOEND_UNWRITTEN) {
+ /* unwritten extents are only supported on extents files */
+ if (!(inode.i_flags & EXT4_EXTENTS_FL)) {
+ ret = translate_error(fs, attr_ino,
+ EXT2_ET_FILESYSTEM_CORRUPTED);
+ goto out_unlock;
+ }
+
+ ret = fuse2fs_convert_unwritten_mappings(ff, attr_ino, &inode,
+ pos, written);
+ if (ret)
+ goto out_unlock;
+
+ dirty = true;
+ }
+
+ isize = EXT2_I_SIZE(&inode);
+ if (pos + written > isize) {
+ err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode),
+ pos + written);
+ if (err) {
+ ret = translate_error(fs, attr_ino, err);
+ goto out_unlock;
+ }
+
+ dirty = true;
+ }
+
+ if (dirty) {
+ err = fuse2fs_write_inode(fs, attr_ino, &inode);
+ if (err) {
+ ret = translate_error(fs, attr_ino, err);
+ goto out_unlock;
+ }
+ }
+
+ *newsize = EXT2_I_SIZE(&inode);
+out_unlock:
+ fuse2fs_finish(ff, ret);
+ return ret;
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_operations fs_ops = {
@@ -5943,6 +6407,7 @@ static struct fuse_operations fs_ops = {
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
.iomap_config = op_iomap_config,
+ .iomap_ioend = op_iomap_ioend,
#endif /* HAVE_FUSE_IOMAP */
};
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (6 preceding siblings ...)
2026-04-29 14:54 ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
@ 2026-04-29 14:54 ` Darrick J. Wong
2026-04-29 14:54 ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
` (10 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:54 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Turn on iomap for pagecache IO to regular files.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++------
misc/fuse2fs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 108 insertions(+), 14 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 8b508de5b8cb65..fa82fda99ff687 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -6438,9 +6438,6 @@ static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
uint64_t count, uint32_t opflags,
struct fuse_file_iomap *read)
{
- if (!(opflags & FUSE_IOMAP_OP_DIRECT))
- return -ENOSYS;
-
/* fall back to slow path for inline data reads */
if (inode->i_flags & EXT4_INLINE_DATA_FL)
return -ENOSYS;
@@ -6531,9 +6528,6 @@ static int fuse4fs_iomap_begin_write(struct fuse4fs *ff, ext2_ino_t ino,
off_t max_size = fuse4fs_max_file_size(ff, inode);
int ret;
- if (!(opflags & FUSE_IOMAP_OP_DIRECT))
- return -ENOSYS;
-
if (pos >= max_size)
return -EFBIG;
@@ -6629,12 +6623,51 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
fuse_reply_iomap_begin(req, &read, NULL);
}
+static int fuse4fs_iomap_append_setsize(struct fuse4fs *ff, ext2_ino_t ino,
+ loff_t newsize)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2_inode_large inode;
+ ext2_off64_t isize;
+ errcode_t err;
+
+ dbg_printf(ff, "%s: ino=%u newsize=%llu\n", __func__, ino,
+ (unsigned long long)newsize);
+
+ err = fuse4fs_read_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ isize = EXT2_I_SIZE(&inode);
+ if (newsize <= isize)
+ return 0;
+
+ dbg_printf(ff, "%s: ino=%u oldsize=%llu newsize=%llu\n", __func__, ino,
+ (unsigned long long)isize,
+ (unsigned long long)newsize);
+
+ /*
+ * XXX cheesily update the ondisk size even though we only want to do
+ * the incore size until writeback happens
+ */
+ err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode), newsize);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = fuse4fs_write_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ return 0;
+}
+
static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
off_t pos, uint64_t count, uint32_t opflags,
ssize_t written, const struct fuse_file_iomap *iomap)
{
struct fuse4fs *ff = fuse4fs_get(req);
ext2_ino_t ino;
+ int ret = 0;
FUSE4FS_CHECK_CONTEXT(req);
FUSE4FS_CONVERT_FINO(req, &ino, fino);
@@ -6648,7 +6681,21 @@ static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
written,
iomap->flags);
- fuse_reply_err(req, 0);
+ fuse4fs_start(ff);
+
+ /* XXX is this really necessary? */
+ if ((opflags & FUSE_IOMAP_OP_WRITE) &&
+ !(opflags & FUSE_IOMAP_OP_DIRECT) &&
+ (iomap->flags & FUSE_IOMAP_F_SIZE_CHANGED) &&
+ written > 0) {
+ ret = fuse4fs_iomap_append_setsize(ff, ino, pos + written);
+ if (ret)
+ goto out_unlock;
+ }
+
+out_unlock:
+ fuse4fs_finish(ff, ret);
+ fuse_reply_err(req, -ret);
}
/*
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 67a5bc4c5cc986..679406323df86f 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -5673,9 +5673,6 @@ static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
uint64_t count, uint32_t opflags,
struct fuse_file_iomap *read)
{
- if (!(opflags & FUSE_IOMAP_OP_DIRECT))
- return -ENOSYS;
-
/* fall back to slow path for inline data reads */
if (inode->i_flags & EXT4_INLINE_DATA_FL)
return -ENOSYS;
@@ -5763,9 +5760,6 @@ static int fuse2fs_iomap_begin_write(struct fuse2fs *ff, ext2_ino_t ino,
off_t max_size = fuse2fs_max_file_size(ff, inode);
int ret;
- if (!(opflags & FUSE_IOMAP_OP_DIRECT))
- return -ENOSYS;
-
if (pos >= max_size)
return -EFBIG;
@@ -5860,11 +5854,50 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
return ret;
}
+static int fuse2fs_iomap_append_setsize(struct fuse2fs *ff, ext2_ino_t ino,
+ loff_t newsize)
+{
+ ext2_filsys fs = ff->fs;
+ struct ext2_inode_large inode;
+ ext2_off64_t isize;
+ errcode_t err;
+
+ dbg_printf(ff, "%s: ino=%u newsize=%llu\n", __func__, ino,
+ (unsigned long long)newsize);
+
+ err = fuse2fs_read_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ isize = EXT2_I_SIZE(&inode);
+ if (newsize <= isize)
+ return 0;
+
+ dbg_printf(ff, "%s: ino=%u oldsize=%llu newsize=%llu\n", __func__, ino,
+ (unsigned long long)isize,
+ (unsigned long long)newsize);
+
+ /*
+ * XXX cheesily update the ondisk size even though we only want to do
+ * the incore size until writeback happens
+ */
+ err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode), newsize);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ err = fuse2fs_write_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ return 0;
+}
+
static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
off_t pos, uint64_t count, uint32_t opflags,
ssize_t written, const struct fuse_file_iomap *iomap)
{
struct fuse2fs *ff = fuse2fs_get();
+ int ret = 0;
FUSE2FS_CHECK_CONTEXT(ff);
@@ -5879,7 +5912,21 @@ static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
written,
iomap->flags);
- return 0;
+ fuse2fs_start(ff);
+
+ /* XXX is this really necessary? */
+ if ((opflags & FUSE_IOMAP_OP_WRITE) &&
+ !(opflags & FUSE_IOMAP_OP_DIRECT) &&
+ (iomap->flags & FUSE_IOMAP_F_SIZE_CHANGED) &&
+ written > 0) {
+ ret = fuse2fs_iomap_append_setsize(ff, attr_ino, pos + written);
+ if (ret)
+ goto out_unlock;
+ }
+
+out_unlock:
+ fuse2fs_finish(ff, ret);
+ return ret;
}
/*
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 09/19] fuse2fs: don't zero bytes in punch hole
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (7 preceding siblings ...)
2026-04-29 14:54 ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
@ 2026-04-29 14:54 ` Darrick J. Wong
2026-04-29 14:55 ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
` (9 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:54 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
When iomap is in use for the pagecache, it will take care of zeroing the
unaligned parts of punched out regions so we don't have to do it
ourselves.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 8 ++++++++
misc/fuse2fs.c | 9 +++++++++
2 files changed, 17 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index fa82fda99ff687..40713b0d0d5e37 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -5868,6 +5868,10 @@ static errcode_t fuse4fs_zero_middle(struct fuse4fs *ff, ext2_ino_t ino,
int retflags;
errcode_t err;
+ /* the kernel does this for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ return 0;
+
if (!*buf) {
err = ext2fs_get_mem(fs->blocksize, buf);
if (err)
@@ -5904,6 +5908,10 @@ static errcode_t fuse4fs_zero_edge(struct fuse4fs *ff, ext2_ino_t ino,
off_t residue;
errcode_t err;
+ /* the kernel does this for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ return 0;
+
residue = FUSE4FS_OFF_IN_FSB(ff, offset);
if (residue == 0)
return 0;
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 679406323df86f..a37851cdf30785 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -727,6 +727,7 @@ static inline int fuse2fs_iomap_enabled(const struct fuse2fs *ff)
}
#else
# define fuse2fs_iomap_enabled(...) (0)
+# define fuse2fs_iomap_enabled(...) (0)
#endif
static inline void fuse2fs_dump_extents(struct fuse2fs *ff, ext2_ino_t ino,
@@ -5104,6 +5105,10 @@ static errcode_t clean_block_middle(struct fuse2fs *ff, ext2_ino_t ino,
int retflags;
errcode_t err;
+ /* the kernel does this for us in iomap mode */
+ if (fuse2fs_iomap_enabled(ff))
+ return 0;
+
if (!*buf) {
err = ext2fs_get_mem(fs->blocksize, buf);
if (err)
@@ -5140,6 +5145,10 @@ static errcode_t clean_block_edge(struct fuse2fs *ff, ext2_ino_t ino,
off_t residue;
errcode_t err;
+ /* the kernel does this for us in iomap mode */
+ if (fuse2fs_iomap_enabled(ff))
+ return 0;
+
residue = FUSE2FS_OFF_IN_FSB(ff, offset);
if (residue == 0)
return 0;
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (8 preceding siblings ...)
2026-04-29 14:54 ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
@ 2026-04-29 14:55 ` Darrick J. Wong
2026-04-29 14:55 ` [PATCH 11/19] fuse2fs: try to create loop device when ext4 device is a regular file Darrick J. Wong
` (8 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:55 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
When iomap is in use for the page cache, the kernel will take care of
all the file data block IO for us, including zeroing of punched ranges
and post-EOF bytes. fuse2fs only needs to do IO for inline data.
Therefore, set the NOBLOCKIO ext2_file flag so that libext2fs will not
do any regular file IO to or from disk blocks at all.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 11 +++++++-
misc/fuse2fs.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 40713b0d0d5e37..68f1f7c02df223 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -3933,9 +3933,14 @@ static int fuse4fs_truncate(struct fuse4fs *ff, ext2_ino_t ino, off_t new_size)
ext2_file_t file;
__u64 old_isize;
errcode_t err;
+ int flags = EXT2_FILE_WRITE;
int ret = 0;
- err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file);
+ /* the kernel handles all eof zeroing for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ flags |= EXT2_FILE_NOBLOCKIO;
+
+ err = ext2fs_file_open(fs, ino, flags, &file);
if (err)
return translate_error(fs, ino, err);
@@ -4030,6 +4035,10 @@ static int fuse4fs_open_file(struct fuse4fs *ff, const struct fuse_ctx *ctxt,
if (linked)
check |= L_OK;
+ /* the kernel handles all block IO for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ file->open_flags |= EXT2_FILE_NOBLOCKIO;
+
/*
* If the caller wants to truncate the file, we need to ask for full
* write access even if the caller claims to be appending.
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index a37851cdf30785..f7653dc6c20c3f 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -3463,15 +3463,72 @@ static int fuse2fs_punch_posteof(struct fuse2fs *ff, ext2_ino_t ino,
return 0;
}
+/*
+ * Decide if file IO for this inode can use iomap.
+ *
+ * It turns out that libfuse creates internal node ids that have nothing to do
+ * with the ext2_ino_t that we give it. These internal node ids are what
+ * actually gets igetted in the kernel, which means that there can be multiple
+ * fuse_inode objects in the kernel for a single hardlinked ondisk ext2 inode.
+ *
+ * What this means, horrifyingly, is that on a fuse filesystem that supports
+ * hard links, the in-kernel i_rwsem does not protect against concurrent writes
+ * between files that point to the same inode. That in turn means that the
+ * file mode and size can get desynchronized between the multiple fuse_inode
+ * objects. This also means that we cannot cache iomaps in the kernel AT ALL
+ * because the caches will get out of sync, leading to WARN_ONs from the iomap
+ * zeroing code and probably data corruption after that.
+ *
+ * Therefore, libfuse won't let us create hardlinks of iomap files, and we must
+ * never turn on iomap for existing hardlinked files. Long term it means we
+ * have to find a way around this loss of functionality. fuse4fs gets around
+ * this by being a low level fuse driver and controlling the nodeids itself.
+ *
+ * Returns 0 for no, 1 for yes, or a negative errno.
+ */
+#ifdef HAVE_FUSE_IOMAP
+static int fuse2fs_file_uses_iomap(struct fuse2fs *ff, ext2_ino_t ino)
+{
+ struct stat statbuf;
+ int ret;
+
+ if (!fuse2fs_iomap_enabled(ff))
+ return 0;
+
+ ret = stat_inode(ff->fs, ino, &statbuf);
+ if (ret)
+ return ret;
+
+ /* the kernel handles all block IO for us in iomap mode */
+ return fuse_fs_can_enable_iomap(&statbuf);
+}
+#else
+# define fuse2fs_file_uses_iomap(...) (0)
+#endif
+
static int fuse2fs_truncate(struct fuse2fs *ff, ext2_ino_t ino, off_t new_size)
{
ext2_filsys fs = ff->fs;
ext2_file_t file;
__u64 old_isize;
errcode_t err;
+ int flags = EXT2_FILE_WRITE;
int ret = 0;
- err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file);
+ /* the kernel handles all eof zeroing for us in iomap mode */
+ ret = fuse2fs_file_uses_iomap(ff, ino);
+ switch (ret) {
+ case 0:
+ break;
+ case 1:
+ flags |= EXT2_FILE_NOBLOCKIO;
+ ret = 0;
+ break;
+ default:
+ return ret;
+ }
+
+ err = ext2fs_file_open(fs, ino, flags, &file);
if (err)
return translate_error(fs, ino, err);
@@ -3626,6 +3683,19 @@ static int __op_open(struct fuse2fs *ff, const char *path,
goto out;
}
+ /* the kernel handles all block IO for us in iomap mode */
+ ret = fuse2fs_file_uses_iomap(ff, file->ino);
+ switch (ret) {
+ case 0:
+ break;
+ case 1:
+ file->open_flags |= EXT2_FILE_NOBLOCKIO;
+ ret = 0;
+ break;
+ default:
+ goto out;
+ }
+
if (fp->flags & O_TRUNC) {
ret = fuse2fs_truncate(ff, file->ino, 0);
if (ret)
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 11/19] fuse2fs: try to create loop device when ext4 device is a regular file
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (9 preceding siblings ...)
2026-04-29 14:55 ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
@ 2026-04-29 14:55 ` Darrick J. Wong
2026-04-29 14:55 ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
` (7 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:55 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
If the filesystem device is a regular file, try to create a loop device
for it so that we can take advantage of iomap.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
configure | 41 +++++++++++++++++++
configure.ac | 23 +++++++++++
fuse4fs/fuse4fs.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
lib/config.h.in | 3 +
misc/fuse2fs.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 292 insertions(+), 4 deletions(-)
diff --git a/configure b/configure
index 344c7af2ee48f8..ba1556b34257a6 100755
--- a/configure
+++ b/configure
@@ -14691,6 +14691,47 @@ printf "%s\n" "#define HAVE_FUSE_IOMAP 1" >>confdefs.h
fi
+if test -n "$have_fuse_iomap"; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fuse_loopdev.h in libfuse" >&5
+printf %s "checking for fuse_loopdev.h in libfuse... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #define _GNU_SOURCE
+ #define _FILE_OFFSET_BITS 64
+ #define FUSE_USE_VERSION 399
+ #include <fuse_loopdev.h>
+
+int
+main (void)
+{
+
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+ have_fuse_loopdev=yes
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+else case e in #(
+ e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; } ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+if test -n "$have_fuse_loopdev"
+then
+
+printf "%s\n" "#define HAVE_FUSE_LOOPDEV 1" >>confdefs.h
+
+fi
+
have_fuse_lowlevel=
if test -n "$FUSE_USE_VERSION"
then
diff --git a/configure.ac b/configure.ac
index 8d85e9966877ea..8cfde4d85489e5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1432,6 +1432,29 @@ then
AC_DEFINE(HAVE_FUSE_IOMAP, 1, [Define to 1 if fuse supports iomap])
fi
+dnl
+dnl Check if fuse library has fuse_loopdev.h, which it only gained after adding
+dnl iomap support.
+dnl
+if test -n "$have_fuse_iomap"; then
+ AC_MSG_CHECKING(for fuse_loopdev.h in libfuse)
+ AC_LINK_IFELSE(
+ [ AC_LANG_PROGRAM([[
+ #define _GNU_SOURCE
+ #define _FILE_OFFSET_BITS 64
+ #define FUSE_USE_VERSION 399
+ #include <fuse_loopdev.h>
+ ]], [[
+ ]])
+ ], have_fuse_loopdev=yes
+ AC_MSG_RESULT(yes),
+ AC_MSG_RESULT(no))
+fi
+if test -n "$have_fuse_loopdev"
+then
+ AC_DEFINE(HAVE_FUSE_LOOPDEV, 1, [Define to 1 if fuse supports loopdev operations])
+fi
+
dnl
dnl Check if the FUSE lowlevel library is supported
dnl
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 68f1f7c02df223..3c3debb6f60ac7 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -27,6 +27,9 @@
#include <unistd.h>
#include <ctype.h>
#include <assert.h>
+#ifdef HAVE_FUSE_LOOPDEV
+# include <fuse_loopdev.h>
+#endif
#define FUSE_DARWIN_ENABLE_EXTENSIONS 0
#ifdef __SET_FOB_FOR_FUSE
# error Do not set magic value __SET_FOB_FOR_FUSE!!!!
@@ -262,6 +265,10 @@ struct fuse4fs {
pthread_mutex_t bfl;
char *device;
char *shortdev;
+#ifdef HAVE_FUSE_LOOPDEV
+ char *loop_device;
+ int loop_fd;
+#endif
/* options set by fuse_opt_parse must be of type int */
int ro;
@@ -285,6 +292,7 @@ struct fuse4fs {
enum fuse4fs_feature_toggle iomap_want;
enum fuse4fs_iomap_state iomap_state;
uint32_t iomap_dev;
+ uint64_t iomap_cap;
#endif
unsigned int blockmask;
unsigned long offset;
@@ -913,8 +921,23 @@ static inline int fuse4fs_iomap_enabled(const struct fuse4fs *ff)
{
return ff->iomap_state >= IOMAP_ENABLED;
}
+
+static inline void fuse4fs_discover_iomap(struct fuse4fs *ff)
+{
+ if (ff->iomap_want == FT_DISABLE)
+ return;
+
+ ff->iomap_cap = fuse_lowlevel_discover_iomap(-1);
+}
+
+static inline bool fuse4fs_can_iomap(const struct fuse4fs *ff)
+{
+ return ff->iomap_cap & FUSE_IOMAP_SUPPORT_FILEIO;
+}
#else
# define fuse4fs_iomap_enabled(...) (0)
+# define fuse4fs_discover_iomap(...) ((void)0)
+# define fuse4fs_can_iomap(...) (false)
#endif
static inline void fuse4fs_dump_extents(struct fuse4fs *ff, ext2_ino_t ino,
@@ -1584,6 +1607,76 @@ static void fuse4fs_release_lockfile(struct fuse4fs *ff)
free(ff->lockfile);
}
+#ifdef HAVE_FUSE_LOOPDEV
+static int fuse4fs_try_losetup(struct fuse4fs *ff, int flags)
+{
+ bool rw = flags & EXT2_FLAG_RW;
+ int dev_fd;
+ int ret;
+
+ /*
+ * Only transform a regular file into a loopdev for iomap, and only if
+ * the service helper isn't required to that for us.
+ */
+ if (!fuse4fs_can_iomap(ff) || fuse4fs_is_service(ff))
+ return 0;
+
+ /* open the actual target device, see if it's a regular file */
+ dev_fd = open(ff->device, rw ? O_RDWR : O_RDONLY);
+ if (dev_fd < 0) {
+ err_printf(ff, "%s: %s\n", _("while opening fs"),
+ error_message(errno));
+ return -1;
+ }
+
+ ret = fuse_loopdev_setup(dev_fd, rw ? O_RDWR : O_RDONLY, ff->device, 5,
+ &ff->loop_fd, &ff->loop_device);
+ if (ret == -EBUSY) {
+ /*
+ * If the setup function returned EBUSY, there is already a
+ * loop device backed by this file. Report that the file is
+ * already in use. Ignore the other errors because we can
+ * otherwise handle filesystem in a file.
+ */
+ err_printf(ff, "%s: %s\n", _("while opening fs loopdev"),
+ error_message(errno));
+ close(dev_fd);
+ return -1;
+ }
+
+ close(dev_fd);
+ return 0;
+}
+
+static void fuse4fs_detach_losetup(struct fuse4fs *ff)
+{
+ if (ff->loop_fd >= 0)
+ close(ff->loop_fd);
+ ff->loop_fd = -1;
+}
+
+static void fuse4fs_undo_losetup(struct fuse4fs *ff)
+{
+ fuse4fs_detach_losetup(ff);
+ free(ff->loop_device);
+ ff->loop_device = NULL;
+}
+
+static inline const char *fuse4fs_device(const struct fuse4fs *ff)
+{
+ /*
+ * If we created a loop device for the file passed in, open that.
+ * Otherwise open the path the user gave us.
+ */
+ return ff->loop_device ? ff->loop_device : ff->device;
+}
+#else
+# define fuse4fs_try_losetup(...) (0)
+# define fuse4fs_detach_losetup(...) ((void)0)
+# define fuse4fs_undo_losetup(...) ((void)0)
+# define fuse4fs_device(ff) ((ff)->device)
+#endif
+
static void fuse4fs_unmount(struct fuse4fs *ff)
{
char uuid[UUID_STR_SIZE];
@@ -1607,6 +1700,7 @@ static void fuse4fs_unmount(struct fuse4fs *ff)
}
fuse4fs_service_close_bdev(ff);
+ fuse4fs_undo_losetup(ff);
if (ff->lockfile)
fuse4fs_release_lockfile(ff);
@@ -1620,6 +1714,8 @@ static errcode_t fuse4fs_open(struct fuse4fs *ff)
EXT2_FLAG_EXCLUSIVE | EXT2_FLAG_WRITE_FULL_SUPER;
errcode_t err;
+ fuse4fs_discover_iomap(ff);
+
if (ff->lockfile) {
err = fuse4fs_acquire_lockfile(ff);
if (err)
@@ -1632,6 +1728,12 @@ static errcode_t fuse4fs_open(struct fuse4fs *ff)
if (ff->directio)
flags |= EXT2_FLAG_DIRECT_IO;
+ dbg_printf(ff, "opening with flags=0x%x\n", flags);
+
+ err = fuse4fs_try_losetup(ff, flags);
+ if (err)
+ return err;
+
/*
* If the filesystem is stored on a block device, the _EXCLUSIVE flag
* causes libext2fs to try to open the block device with O_EXCL. If
@@ -1666,8 +1768,8 @@ static errcode_t fuse4fs_open(struct fuse4fs *ff)
if (fuse4fs_is_service(ff))
err = fuse4fs_service_openfs(ff, options, &flags);
else
- err = ext2fs_open2(ff->device, options, flags, 0, 0,
- unix_io_manager, &ff->fs);
+ err = ext2fs_open2(fuse4fs_device(ff), options, flags,
+ 0, 0, unix_io_manager, &ff->fs);
if ((err == EPERM || err == EACCES) &&
(!ff->ro || (flags & EXT2_FLAG_RW))) {
/*
@@ -1681,6 +1783,11 @@ static errcode_t fuse4fs_open(struct fuse4fs *ff)
flags &= ~EXT2_FLAG_RW;
ff->ro = 1;
+ fuse4fs_undo_losetup(ff);
+ err = fuse4fs_try_losetup(ff, flags);
+ if (err)
+ return err;
+
/* Force the loop to run once more */
err = -1;
}
@@ -2129,6 +2236,8 @@ static void op_init(void *userdata, struct fuse_conn_info *conn)
fuse4fs_iomap_enable(conn, ff);
conn->time_gran = 1;
+ fuse4fs_detach_losetup(ff);
+
if (ff->opstate == F4OP_WRITABLE)
fuse4fs_read_bitmaps(ff);
@@ -7737,6 +7846,9 @@ int main(int argc, char *argv[])
.iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
.iomap_dev = FUSE_IOMAP_DEV_NULL,
+#endif
+#ifdef HAVE_FUSE_LOOPDEV
+ .loop_fd = -1,
#endif
};
errcode_t err;
diff --git a/lib/config.h.in b/lib/config.h.in
index 58338cc926590e..96ed5479181a5b 100644
--- a/lib/config.h.in
+++ b/lib/config.h.in
@@ -151,6 +151,9 @@
/* Define to 1 if fuse supports iomap */
#undef HAVE_FUSE_IOMAP
+/* Define to 1 if fuse supports loopdev operations */
+#undef HAVE_FUSE_LOOPDEV
+
/* Define to 1 if fuse supports lowlevel API */
#undef HAVE_FUSE_LOWLEVEL
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index f7653dc6c20c3f..3c76eba683a10d 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -25,6 +25,9 @@
#include <sys/ioctl.h>
#include <unistd.h>
#include <ctype.h>
+#ifdef HAVE_FUSE_LOOPDEV
+# include <fuse_loopdev.h>
+#endif
#define FUSE_DARWIN_ENABLE_EXTENSIONS 0
#ifdef __SET_FOB_FOR_FUSE
# error Do not set magic value __SET_FOB_FOR_FUSE!!!!
@@ -245,6 +248,10 @@ struct fuse2fs {
pthread_mutex_t bfl;
char *device;
char *shortdev;
+#ifdef HAVE_FUSE_LOOPDEV
+ char *loop_device;
+ int loop_fd;
+#endif
/* options set by fuse_opt_parse must be of type int */
int ro;
@@ -268,6 +275,7 @@ struct fuse2fs {
enum fuse2fs_feature_toggle iomap_want;
enum fuse2fs_iomap_state iomap_state;
uint32_t iomap_dev;
+ uint64_t iomap_cap;
#endif
unsigned int blockmask;
unsigned long offset;
@@ -725,9 +733,23 @@ static inline int fuse2fs_iomap_enabled(const struct fuse2fs *ff)
{
return ff->iomap_state >= IOMAP_ENABLED;
}
+
+static inline void fuse2fs_discover_iomap(struct fuse2fs *ff)
+{
+ if (ff->iomap_want == FT_DISABLE)
+ return;
+
+ ff->iomap_cap = fuse_lowlevel_discover_iomap(-1);
+}
+
+static inline bool fuse2fs_can_iomap(const struct fuse2fs *ff)
+{
+ return ff->iomap_cap & FUSE_IOMAP_SUPPORT_FILEIO;
+}
#else
# define fuse2fs_iomap_enabled(...) (0)
-# define fuse2fs_iomap_enabled(...) (0)
+# define fuse2fs_discover_iomap(...) ((void)0)
+# define fuse2fs_can_iomap(...) (false)
#endif
static inline void fuse2fs_dump_extents(struct fuse2fs *ff, ext2_ino_t ino,
@@ -1201,6 +1223,73 @@ static void fuse2fs_release_lockfile(struct fuse2fs *ff)
free(ff->lockfile);
}
+#ifdef HAVE_FUSE_LOOPDEV
+static int fuse2fs_try_losetup(struct fuse2fs *ff, int flags)
+{
+ bool rw = flags & EXT2_FLAG_RW;
+ int dev_fd;
+ int ret;
+
+ /* Only transform a regular file into a loopdev for iomap */
+ if (!fuse2fs_can_iomap(ff))
+ return 0;
+
+ /* open the actual target device, see if it's a regular file */
+ dev_fd = open(ff->device, rw ? O_RDWR : O_RDONLY);
+ if (dev_fd < 0) {
+ err_printf(ff, "%s: %s\n", _("while opening fs"),
+ error_message(errno));
+ return -1;
+ }
+
+ ret = fuse_loopdev_setup(dev_fd, rw ? O_RDWR : O_RDONLY, ff->device, 5,
+ &ff->loop_fd, &ff->loop_device);
+ if (ret == -EBUSY) {
+ /*
+ * If the setup function returned EBUSY, there is already a
+ * loop device backed by this file. Report that the file is
+ * already in use. Ignore the other errors because we can
+ * otherwise handle filesystem in a file.
+ */
+ err_printf(ff, "%s: %s\n", _("while opening fs loopdev"),
+ error_message(-ret));
+ close(dev_fd);
+ return -1;
+ }
+
+ close(dev_fd);
+ return 0;
+}
+
+static void fuse2fs_detach_losetup(struct fuse2fs *ff)
+{
+ if (ff->loop_fd >= 0)
+ close(ff->loop_fd);
+ ff->loop_fd = -1;
+}
+
+static void fuse2fs_undo_losetup(struct fuse2fs *ff)
+{
+ fuse2fs_detach_losetup(ff);
+ free(ff->loop_device);
+ ff->loop_device = NULL;
+}
+
+static inline const char *fuse2fs_device(const struct fuse2fs *ff)
+{
+ /*
+ * If we created a loop device for the file passed in, open that.
+ * Otherwise open the path the user gave us.
+ */
+ return ff->loop_device ? ff->loop_device : ff->device;
+}
+#else
+# define fuse2fs_try_losetup(...) (0)
+# define fuse2fs_detach_losetup(...) ((void)0)
+# define fuse2fs_undo_losetup(...) ((void)0)
+# define fuse2fs_device(ff) ((ff)->device)
+#endif
+
static void fuse2fs_unmount(struct fuse2fs *ff)
{
char uuid[UUID_STR_SIZE];
@@ -1218,6 +1307,8 @@ static void fuse2fs_unmount(struct fuse2fs *ff)
uuid);
}
+ fuse2fs_undo_losetup(ff);
+
if (ff->lockfile)
fuse2fs_release_lockfile(ff);
}
@@ -1230,6 +1321,8 @@ static errcode_t fuse2fs_open(struct fuse2fs *ff)
EXT2_FLAG_EXCLUSIVE | EXT2_FLAG_WRITE_FULL_SUPER;
errcode_t err;
+ fuse2fs_discover_iomap(ff);
+
if (ff->lockfile) {
err = fuse2fs_acquire_lockfile(ff);
if (err)
@@ -1242,6 +1335,12 @@ static errcode_t fuse2fs_open(struct fuse2fs *ff)
if (ff->directio)
flags |= EXT2_FLAG_DIRECT_IO;
+ dbg_printf(ff, "opening with flags=0x%x\n", flags);
+
+ err = fuse2fs_try_losetup(ff, flags);
+ if (err)
+ return err;
+
/*
* If the filesystem is stored on a block device, the _EXCLUSIVE flag
* causes libext2fs to try to open the block device with O_EXCL. If
@@ -1273,7 +1372,7 @@ static errcode_t fuse2fs_open(struct fuse2fs *ff)
*/
deadline = init_deadline(FUSE2FS_OPEN_TIMEOUT);
do {
- err = ext2fs_open2(ff->device, options, flags, 0, 0,
+ err = ext2fs_open2(fuse2fs_device(ff), options, flags, 0, 0,
unix_io_manager, &ff->fs);
if ((err == EPERM || err == EACCES) &&
(!ff->ro || (flags & EXT2_FLAG_RW))) {
@@ -1288,6 +1387,11 @@ static errcode_t fuse2fs_open(struct fuse2fs *ff)
flags &= ~EXT2_FLAG_RW;
ff->ro = 1;
+ fuse2fs_undo_losetup(ff);
+ err = fuse2fs_try_losetup(ff, flags);
+ if (err)
+ return err;
+
/* Force the loop to run once more */
err = -1;
}
@@ -1744,6 +1848,8 @@ static void *op_init(struct fuse_conn_info *conn,
cfg->debug = 1;
cfg->nullpath_ok = 1;
+ fuse2fs_detach_losetup(ff);
+
if (ff->opstate == F2OP_WRITABLE)
fuse2fs_read_bitmaps(ff);
@@ -6852,6 +6958,9 @@ int main(int argc, char *argv[])
.iomap_want = FT_DEFAULT,
.iomap_state = IOMAP_UNKNOWN,
.iomap_dev = FUSE_IOMAP_DEV_NULL,
+#endif
+#ifdef HAVE_FUSE_LOOPDEV
+ .loop_fd = -1,
#endif
};
errcode_t err;
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 12/19] fuse2fs: enable file IO to inline data files
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (10 preceding siblings ...)
2026-04-29 14:55 ` [PATCH 11/19] fuse2fs: try to create loop device when ext4 device is a regular file Darrick J. Wong
@ 2026-04-29 14:55 ` Darrick J. Wong
2026-04-29 14:56 ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
` (6 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:55 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Enable file reads and writes from inline data files.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 3 ++-
misc/fuse2fs.c | 42 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 3c3debb6f60ac7..e2421dda75475a 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -6566,7 +6566,8 @@ static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
{
/* fall back to slow path for inline data reads */
if (inode->i_flags & EXT4_INLINE_DATA_FL)
- return -ENOSYS;
+ return fuse4fs_iomap_begin_inline(ff, ino, inode, pos, count,
+ read);
if (inode->i_flags & EXT4_EXTENTS_FL)
return fuse4fs_iomap_begin_extent(ff, ino, inode, pos, count,
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 3c76eba683a10d..eecbf60a3360c6 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -1846,7 +1846,16 @@ static void *op_init(struct fuse_conn_info *conn,
cfg->use_ino = 1;
if (ff->debug)
cfg->debug = 1;
- cfg->nullpath_ok = 1;
+
+ /*
+ * Inline data file io depends on op_read/write being fed a path, so we
+ * have to slow everyone down to look up the path from the nodeid.
+ */
+ if (fuse2fs_iomap_enabled(ff) &&
+ ext2fs_has_feature_inline_data(ff->fs->super))
+ cfg->nullpath_ok = 0;
+ else
+ cfg->nullpath_ok = 1;
fuse2fs_detach_losetup(ff);
@@ -3840,6 +3849,9 @@ static int op_read(const char *path EXT2FS_ATTR((unused)), char *buf,
size_t len, off_t offset,
struct fuse_file_info *fp)
{
+ struct fuse2fs_file_handle fhurk = {
+ .magic = FUSE2FS_FILE_MAGIC,
+ };
struct fuse2fs *ff = fuse2fs_get();
struct fuse2fs_file_handle *fh = fuse2fs_get_handle(fp);
ext2_filsys fs;
@@ -3849,10 +3861,21 @@ static int op_read(const char *path EXT2FS_ATTR((unused)), char *buf,
int ret = 0;
FUSE2FS_CHECK_CONTEXT(ff);
+
+ if (!fh)
+ fh = &fhurk;
+
FUSE2FS_CHECK_HANDLE(ff, fh);
dbg_printf(ff, "%s: ino=%d off=0x%llx len=0x%zx\n", __func__, fh->ino,
(unsigned long long)offset, len);
fs = fuse2fs_start(ff);
+
+ if (fh == &fhurk) {
+ ret = fuse2fs_file_ino(ff, path, NULL, &fhurk.ino);
+ if (ret)
+ goto out;
+ }
+
err = ext2fs_file_open(fs, fh->ino, fh->open_flags, &efp);
if (err) {
ret = translate_error(fs, fh->ino, err);
@@ -3894,6 +3917,10 @@ static int op_write(const char *path EXT2FS_ATTR((unused)),
const char *buf, size_t len, off_t offset,
struct fuse_file_info *fp)
{
+ struct fuse2fs_file_handle fhurk = {
+ .magic = FUSE2FS_FILE_MAGIC,
+ .open_flags = EXT2_FILE_WRITE,
+ };
struct fuse2fs *ff = fuse2fs_get();
struct fuse2fs_file_handle *fh = fuse2fs_get_handle(fp);
ext2_filsys fs;
@@ -3903,6 +3930,10 @@ static int op_write(const char *path EXT2FS_ATTR((unused)),
int ret = 0;
FUSE2FS_CHECK_CONTEXT(ff);
+
+ if (!fh)
+ fh = &fhurk;
+
FUSE2FS_CHECK_HANDLE(ff, fh);
dbg_printf(ff, "%s: ino=%d off=0x%llx len=0x%zx\n", __func__, fh->ino,
(unsigned long long) offset, len);
@@ -3917,6 +3948,12 @@ static int op_write(const char *path EXT2FS_ATTR((unused)),
goto out;
}
+ if (fh == &fhurk) {
+ ret = fuse2fs_file_ino(ff, path, NULL, &fhurk.ino);
+ if (ret)
+ goto out;
+ }
+
err = ext2fs_file_open(fs, fh->ino, fh->open_flags, &efp);
if (err) {
ret = translate_error(fs, fh->ino, err);
@@ -5860,7 +5897,8 @@ static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
{
/* fall back to slow path for inline data reads */
if (inode->i_flags & EXT4_INLINE_DATA_FL)
- return -ENOSYS;
+ return fuse2fs_iomap_begin_inline(ff, ino, inode, pos, count,
+ read);
if (inode->i_flags & EXT4_EXTENTS_FL)
return fuse2fs_iomap_begin_extent(ff, ino, inode, pos, count,
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 13/19] fuse2fs: set iomap-related inode flags
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (11 preceding siblings ...)
2026-04-29 14:55 ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
@ 2026-04-29 14:56 ` Darrick J. Wong
2026-04-29 14:56 ` [PATCH 14/19] fuse2fs: configure block device block size Darrick J. Wong
` (5 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:56 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Set FUSE_IFLAG_* when we do a getattr, so that all files will have iomap
enabled.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 46 +++++++++++++++++++++++++++++++++++-----------
misc/fuse2fs.c | 20 ++++++++++++++++++++
2 files changed, 55 insertions(+), 11 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index e2421dda75475a..f9c905c0805d9e 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -2265,6 +2265,7 @@ static void op_init(void *userdata, struct fuse_conn_info *conn)
struct fuse4fs_stat {
struct fuse_entry_param entry;
+ unsigned int iflags;
};
static int fuse4fs_stat_inode(struct fuse4fs *ff, ext2_ino_t ino,
@@ -2330,9 +2331,29 @@ static int fuse4fs_stat_inode(struct fuse4fs *ff, ext2_ino_t ino,
entry->attr_timeout = FUSE4FS_ATTR_TIMEOUT;
entry->entry_timeout = FUSE4FS_ATTR_TIMEOUT;
+ fstat->iflags = 0;
+#ifdef HAVE_FUSE_IOMAP
+ if (fuse4fs_iomap_enabled(ff))
+ fstat->iflags |= FUSE_IFLAG_IOMAP | FUSE_IFLAG_EXCLUSIVE;
+#endif
+
return 0;
}
+#if FUSE_VERSION < FUSE_MAKE_VERSION(3, 99)
+#define fuse_reply_entry_iflags(req, entry, iflags) \
+ fuse_reply_entry((req), (entry))
+
+#define fuse_reply_attr_iflags(req, entry, iflags, timeout) \
+ fuse_reply_attr((req), (entry), (timeout))
+
+#define fuse_add_direntry_plus_iflags(req, buf, sz, name, iflags, entry, dirpos) \
+ fuse_add_direntry_plus((req), (buf), (sz), (name), (entry), (dirpos))
+
+#define fuse_reply_create_iflags(req, entry, iflags, fp) \
+ fuse_reply_create((req), (entry), (fp))
+#endif
+
static void op_lookup(fuse_req_t req, fuse_ino_t fino, const char *name)
{
struct fuse4fs_stat fstat;
@@ -2363,7 +2384,7 @@ static void op_lookup(fuse_req_t req, fuse_ino_t fino, const char *name)
if (ret)
fuse_reply_err(req, -ret);
else
- fuse_reply_entry(req, &fstat.entry);
+ fuse_reply_entry_iflags(req, &fstat.entry, fstat.iflags);
}
static void op_getattr(fuse_req_t req, fuse_ino_t fino,
@@ -2383,8 +2404,8 @@ static void op_getattr(fuse_req_t req, fuse_ino_t fino,
if (ret)
fuse_reply_err(req, -ret);
else
- fuse_reply_attr(req, &fstat.entry.attr,
- fstat.entry.attr_timeout);
+ fuse_reply_attr_iflags(req, &fstat.entry.attr, fstat.iflags,
+ fstat.entry.attr_timeout);
}
static void op_readlink(fuse_req_t req, fuse_ino_t fino)
@@ -2662,7 +2683,7 @@ static void fuse4fs_reply_entry(fuse_req_t req, ext2_ino_t ino,
return;
}
- fuse_reply_entry(req, &fstat.entry);
+ fuse_reply_entry_iflags(req, &fstat.entry, fstat.iflags);
}
static void op_mknod(fuse_req_t req, fuse_ino_t fino, const char *name,
@@ -4990,10 +5011,13 @@ static int op_readdir_iter(ext2_ino_t dir EXT2FS_ATTR((unused)),
namebuf[dirent->name_len & 0xFF] = 0;
if (i->readdirplus) {
- entrysize = fuse_add_direntry_plus(i->req, i->buf + i->bufused,
- i->bufsz - i->bufused,
- namebuf, &fstat.entry,
- i->dirpos);
+ entrysize = fuse_add_direntry_plus_iflags(i->req,
+ i->buf + i->bufused,
+ i->bufsz - i->bufused,
+ namebuf,
+ fstat.iflags,
+ &fstat.entry,
+ i->dirpos);
} else {
entrysize = fuse_add_direntry(i->req, i->buf + i->bufused,
i->bufsz - i->bufused, namebuf,
@@ -5218,7 +5242,7 @@ static void op_create(fuse_req_t req, fuse_ino_t fino, const char *name,
if (ret)
fuse_reply_err(req, -ret);
else
- fuse_reply_create(req, &fstat.entry, fp);
+ fuse_reply_create_iflags(req, &fstat.entry, fstat.iflags, fp);
}
#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 17)
@@ -5417,8 +5441,8 @@ static void op_setattr(fuse_req_t req, fuse_ino_t fino, struct stat *attr,
if (ret)
fuse_reply_err(req, -ret);
else
- fuse_reply_attr(req, &fstat.entry.attr,
- fstat.entry.attr_timeout);
+ fuse_reply_attr_iflags(req, &fstat.entry.attr, fstat.iflags,
+ fstat.entry.attr_timeout);
}
#define FUSE4FS_MODIFIABLE_IFLAGS \
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index eecbf60a3360c6..c6472a1c45506f 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -1987,6 +1987,23 @@ static int op_getattr(const char *path, struct stat *statbuf,
return ret;
}
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
+static int op_getattr_iflags(const char *path, struct stat *statbuf,
+ unsigned int *iflags, struct fuse_file_info *fi)
+{
+ int ret = op_getattr(path, statbuf, fi);
+
+ if (ret)
+ return ret;
+
+ if (fuse_fs_can_enable_iomap(statbuf))
+ *iflags |= FUSE_IFLAG_IOMAP | FUSE_IFLAG_EXCLUSIVE;
+
+ return 0;
+}
+#endif
+
+
static int op_readlink(const char *path, char *buf, size_t len)
{
struct fuse2fs *ff = fuse2fs_get();
@@ -6673,6 +6690,9 @@ static struct fuse_operations fs_ops = {
#ifdef SUPPORT_FALLOCATE
.fallocate = op_fallocate,
#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
+ .getattr_iflags = op_getattr_iflags,
+#endif
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 14/19] fuse2fs: configure block device block size
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (12 preceding siblings ...)
2026-04-29 14:56 ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
@ 2026-04-29 14:56 ` Darrick J. Wong
2026-04-29 14:56 ` [PATCH 15/19] fuse4fs: separate invalidation Darrick J. Wong
` (4 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:56 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Set the blocksize of the block device to the filesystem blocksize.
This prevents the bdev pagecache from caching file data blocks that
iomap will read and write directly. Cache duplication is dangerous.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 43 +++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 43 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index f9c905c0805d9e..e92a85da0115ca 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -6891,6 +6891,45 @@ static off_t fuse4fs_max_size(struct fuse4fs *ff, off_t upper_limit)
return res;
}
+/*
+ * Set the block device's blocksize to the fs blocksize.
+ *
+ * This is required to avoid creating uptodate bdev pagecache that aliases file
+ * data blocks because iomap reads and writes directly to file data blocks.
+ */
+static int fuse4fs_set_bdev_blocksize(struct fuse4fs *ff, int fd)
+{
+ int blocksize = ff->fs->blocksize;
+ int set_error;
+ int ret;
+
+ ret = ioctl(fd, BLKBSZSET, &blocksize);
+ if (!ret)
+ return 0;
+
+ /*
+ * Save the original errno so we can report that if the block device
+ * blocksize isn't set in an agreeable way.
+ */
+ set_error = errno;
+
+ ret = ioctl(fd, BLKBSZGET, &blocksize);
+ if (ret)
+ goto out_bad;
+
+ /* Pretend that BLKBSZSET rejected our proposed block size */
+ if (blocksize > ff->fs->blocksize) {
+ set_error = EINVAL;
+ goto out_bad;
+ }
+
+ return 0;
+out_bad:
+ err_printf(ff, "%s: cannot set blocksize %u: %s\n", __func__,
+ blocksize, strerror(set_error));
+ return -EIO;
+}
+
static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
{
errcode_t err;
@@ -6901,6 +6940,10 @@ static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
if (err)
return translate_error(ff->fs, 0, err);
+ ret = fuse4fs_set_bdev_blocksize(ff, fd);
+ if (ret)
+ return ret;
+
ret = fuse_lowlevel_iomap_device_add(ff->fuse, fd, 0);
if (ret < 0) {
dbg_printf(ff, "%s: cannot register iomap dev fd=%d, err=%d\n",
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c6472a1c45506f..c922c7fb45d311 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -6211,6 +6211,45 @@ static off_t fuse2fs_max_size(struct fuse2fs *ff, off_t upper_limit)
return res;
}
+/*
+ * Set the block device's blocksize to the fs blocksize.
+ *
+ * This is required to avoid creating uptodate bdev pagecache that aliases file
+ * data blocks because iomap reads and writes directly to file data blocks.
+ */
+static int fuse2fs_set_bdev_blocksize(struct fuse2fs *ff, int fd)
+{
+ int blocksize = ff->fs->blocksize;
+ int set_error;
+ int ret;
+
+ ret = ioctl(fd, BLKBSZSET, &blocksize);
+ if (!ret)
+ return 0;
+
+ /*
+ * Save the original errno so we can report that if the block device
+ * blocksize isn't set in an agreeable way.
+ */
+ set_error = errno;
+
+ ret = ioctl(fd, BLKBSZGET, &blocksize);
+ if (ret)
+ goto out_bad;
+
+ /* Pretend that BLKBSZSET rejected our proposed block size */
+ if (blocksize > ff->fs->blocksize) {
+ set_error = EINVAL;
+ goto out_bad;
+ }
+
+ return 0;
+out_bad:
+ err_printf(ff, "%s: cannot set blocksize %u: %s\n", __func__,
+ blocksize, strerror(set_error));
+ return -EIO;
+}
+
static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
{
errcode_t err;
@@ -6221,6 +6260,10 @@ static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
if (err)
return translate_error(ff->fs, 0, err);
+ ret = fuse2fs_set_bdev_blocksize(ff, fd);
+ if (ret)
+ return ret;
+
ret = fuse_fs_iomap_device_add(fd, 0);
if (ret < 0) {
dbg_printf(ff, "%s: cannot register iomap dev fd=%d, err=%d\n",
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 15/19] fuse4fs: separate invalidation
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (13 preceding siblings ...)
2026-04-29 14:56 ` [PATCH 14/19] fuse2fs: configure block device block size Darrick J. Wong
@ 2026-04-29 14:56 ` Darrick J. Wong
2026-04-29 14:56 ` [PATCH 16/19] fuse2fs: implement statx Darrick J. Wong
` (3 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:56 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Use the new stuff
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 121 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index e92a85da0115ca..6016e23c511ac1 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -293,6 +293,9 @@ struct fuse4fs {
enum fuse4fs_iomap_state iomap_state;
uint32_t iomap_dev;
uint64_t iomap_cap;
+ void (*old_alloc_stats)(ext2_filsys fs, blk64_t blk, int inuse);
+ void (*old_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num,
+ int inuse);
#endif
unsigned int blockmask;
unsigned long offset;
@@ -6958,6 +6961,51 @@ static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
return 0;
}
+static void fuse4fs_invalidate_bdev(struct fuse4fs *ff, blk64_t blk, blk_t num)
+{
+ off_t offset = FUSE4FS_FSB_TO_B(ff, blk);
+ off_t length = FUSE4FS_FSB_TO_B(ff, num);
+ int ret;
+
+ ret = fuse_lowlevel_iomap_device_invalidate(ff->fuse, ff->iomap_dev,
+ offset, length);
+ if (!ret)
+ return;
+
+ if (num == 1)
+ err_printf(ff, "%s %llu: %s\n",
+ _("error invalidating block"),
+ (unsigned long long)blk,
+ strerror(ret));
+ else
+ err_printf(ff, "%s %llu-%llu: %s\n",
+ _("error invalidating blocks"),
+ (unsigned long long)blk,
+ (unsigned long long)blk + num - 1,
+ strerror(ret));
+}
+
+static void fuse4fs_alloc_stats(ext2_filsys fs, blk64_t blk, int inuse)
+{
+ struct fuse4fs *ff = fs->priv_data;
+
+ if (inuse < 0)
+ fuse4fs_invalidate_bdev(ff, blk, 1);
+ if (ff->old_alloc_stats)
+ ff->old_alloc_stats(fs, blk, inuse);
+}
+
+static void fuse4fs_alloc_stats_range(ext2_filsys fs, blk64_t blk, blk_t num,
+ int inuse)
+{
+ struct fuse4fs *ff = fs->priv_data;
+
+ if (inuse < 0)
+ fuse4fs_invalidate_bdev(ff, blk, num);
+ if (ff->old_alloc_stats_range)
+ ff->old_alloc_stats_range(fs, blk, num, inuse);
+}
+
static void op_iomap_config(fuse_req_t req,
const struct fuse_iomap_config_params *p,
size_t psize)
@@ -7004,6 +7052,19 @@ static void op_iomap_config(fuse_req_t req,
if (ret)
goto out_unlock;
+ /*
+ * If we let iomap do all file block IO, then we need to watch for
+ * freed blocks so that we can invalidate any page cache that might
+ * get written to the block deivce.
+ */
+ if (fuse4fs_iomap_enabled(ff)) {
+ ext2fs_set_block_alloc_stats_callback(ff->fs,
+ fuse4fs_alloc_stats, &ff->old_alloc_stats);
+ ext2fs_set_block_alloc_stats_range_callback(ff->fs,
+ fuse4fs_alloc_stats_range,
+ &ff->old_alloc_stats_range);
+ }
+
out_unlock:
fuse4fs_finish(ff, ret);
if (ret)
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c922c7fb45d311..138346fcc4517f 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -276,6 +276,9 @@ struct fuse2fs {
enum fuse2fs_iomap_state iomap_state;
uint32_t iomap_dev;
uint64_t iomap_cap;
+ void (*old_alloc_stats)(ext2_filsys fs, blk64_t blk, int inuse);
+ void (*old_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num,
+ int inuse);
#endif
unsigned int blockmask;
unsigned long offset;
@@ -6278,6 +6281,50 @@ static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
return 0;
}
+static void fuse2fs_invalidate_bdev(struct fuse2fs *ff, blk64_t blk, blk_t num)
+{
+ off_t offset = FUSE2FS_FSB_TO_B(ff, blk);
+ off_t length = FUSE2FS_FSB_TO_B(ff, num);
+ int ret;
+
+ ret = fuse_fs_iomap_device_invalidate(ff->iomap_dev, offset, length);
+ if (!ret)
+ return;
+
+ if (num == 1)
+ err_printf(ff, "%s %llu: %s\n",
+ _("error invalidating block"),
+ (unsigned long long)blk,
+ strerror(ret));
+ else
+ err_printf(ff, "%s %llu-%llu: %s\n",
+ _("error invalidating blocks"),
+ (unsigned long long)blk,
+ (unsigned long long)blk + num - 1,
+ strerror(ret));
+}
+
+static void fuse2fs_alloc_stats(ext2_filsys fs, blk64_t blk, int inuse)
+{
+ struct fuse2fs *ff = fs->priv_data;
+
+ if (inuse < 0)
+ fuse2fs_invalidate_bdev(ff, blk, 1);
+ if (ff->old_alloc_stats)
+ ff->old_alloc_stats(fs, blk, inuse);
+}
+
+static void fuse2fs_alloc_stats_range(ext2_filsys fs, blk64_t blk, blk_t num,
+ int inuse)
+{
+ struct fuse2fs *ff = fs->priv_data;
+
+ if (inuse < 0)
+ fuse2fs_invalidate_bdev(ff, blk, num);
+ if (ff->old_alloc_stats_range)
+ ff->old_alloc_stats_range(fs, blk, num, inuse);
+}
+
static int op_iomap_config(const struct fuse_iomap_config_params *p,
size_t psize, struct fuse_iomap_config *cfg)
{
@@ -6322,6 +6369,19 @@ static int op_iomap_config(const struct fuse_iomap_config_params *p,
if (ret)
goto out_unlock;
+ /*
+ * If we let iomap do all file block IO, then we need to watch for
+ * freed blocks so that we can invalidate any page cache that might
+ * get written to the block deivce.
+ */
+ if (fuse2fs_iomap_enabled(ff)) {
+ ext2fs_set_block_alloc_stats_callback(ff->fs,
+ fuse2fs_alloc_stats, &ff->old_alloc_stats);
+ ext2fs_set_block_alloc_stats_range_callback(ff->fs,
+ fuse2fs_alloc_stats_range,
+ &ff->old_alloc_stats_range);
+ }
+
out_unlock:
fuse2fs_finish(ff, ret);
return ret;
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 16/19] fuse2fs: implement statx
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (14 preceding siblings ...)
2026-04-29 14:56 ` [PATCH 15/19] fuse4fs: separate invalidation Darrick J. Wong
@ 2026-04-29 14:56 ` Darrick J. Wong
2026-04-29 14:57 ` [PATCH 17/19] fuse2fs: enable atomic writes Darrick J. Wong
` (2 subsequent siblings)
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:56 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Implement statx.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 267 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 6016e23c511ac1..8d994fe490e914 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -24,6 +24,7 @@
#include <sys/xattr.h>
#endif
#include <sys/ioctl.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <ctype.h>
#include <assert.h>
@@ -2411,6 +2412,138 @@ static void op_getattr(fuse_req_t req, fuse_ino_t fino,
fstat.entry.attr_timeout);
}
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 18) && defined(STATX_BASIC_STATS)
+static inline void fuse4fs_set_statx_attr(struct statx *stx,
+ uint64_t statx_flag, int set)
+{
+ if (set)
+ stx->stx_attributes |= statx_flag;
+ stx->stx_attributes_mask |= statx_flag;
+}
+
+static void fuse4fs_statx_directio(struct fuse4fs *ff, struct statx *stx)
+{
+ struct statx devx;
+ errcode_t err;
+ int fd;
+
+ err = io_channel_get_fd(ff->fs->io, &fd);
+ if (err)
+ return;
+
+ err = statx(fd, "", AT_EMPTY_PATH, STATX_DIOALIGN, &devx);
+ if (err)
+ return;
+ if (!(devx.stx_mask & STATX_DIOALIGN))
+ return;
+
+ stx->stx_mask |= STATX_DIOALIGN;
+ stx->stx_dio_mem_align = devx.stx_dio_mem_align;
+ stx->stx_dio_offset_align = devx.stx_dio_offset_align;
+}
+
+static int fuse4fs_statx(struct fuse4fs *ff, ext2_ino_t ino, int statx_mask,
+ struct statx *stx)
+{
+ struct ext2_inode_large inode;
+ ext2_filsys fs = ff->fs;;
+ dev_t fakedev = 0;
+ errcode_t err;
+ struct timespec tv;
+
+ err = fuse4fs_read_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ memcpy(&fakedev, fs->super->s_uuid, sizeof(fakedev));
+ stx->stx_mask = STATX_BASIC_STATS;
+ stx->stx_dev_major = major(fakedev);
+ stx->stx_dev_minor = minor(fakedev);
+ stx->stx_ino = ino;
+ stx->stx_mode = inode.i_mode;
+ stx->stx_nlink = inode.i_links_count;
+ stx->stx_uid = inode_uid(inode);
+ stx->stx_gid = inode_gid(inode);
+ stx->stx_size = EXT2_I_SIZE(&inode);
+ stx->stx_blksize = fs->blocksize;
+ stx->stx_blocks = ext2fs_get_stat_i_blocks(fs,
+ EXT2_INODE(&inode));
+ EXT4_INODE_GET_XTIME(i_atime, &tv, &inode);
+ stx->stx_atime.tv_sec = tv.tv_sec;
+ stx->stx_atime.tv_nsec = tv.tv_nsec;
+
+ EXT4_INODE_GET_XTIME(i_mtime, &tv, &inode);
+ stx->stx_mtime.tv_sec = tv.tv_sec;
+ stx->stx_mtime.tv_nsec = tv.tv_nsec;
+
+ EXT4_INODE_GET_XTIME(i_ctime, &tv, &inode);
+ stx->stx_ctime.tv_sec = tv.tv_sec;
+ stx->stx_ctime.tv_nsec = tv.tv_nsec;
+
+ if (EXT4_FITS_IN_INODE(&inode, i_crtime)) {
+ stx->stx_mask |= STATX_BTIME;
+ EXT4_INODE_GET_XTIME(i_crtime, &tv, &inode);
+ stx->stx_btime.tv_sec = tv.tv_sec;
+ stx->stx_btime.tv_nsec = tv.tv_nsec;
+ }
+
+ dbg_printf(ff, "%s: ino=%d atime=%lld.%d mtime=%lld.%d ctime=%lld.%d btime=%lld.%d\n",
+ __func__, ino,
+ (long long int)stx->stx_atime.tv_sec, stx->stx_atime.tv_nsec,
+ (long long int)stx->stx_mtime.tv_sec, stx->stx_mtime.tv_nsec,
+ (long long int)stx->stx_ctime.tv_sec, stx->stx_ctime.tv_nsec,
+ (long long int)stx->stx_btime.tv_sec, stx->stx_btime.tv_nsec);
+
+ if (LINUX_S_ISCHR(inode.i_mode) ||
+ LINUX_S_ISBLK(inode.i_mode)) {
+ if (inode.i_block[0]) {
+ stx->stx_rdev_major = major(inode.i_block[0]);
+ stx->stx_rdev_minor = minor(inode.i_block[0]);
+ } else {
+ stx->stx_rdev_major = major(inode.i_block[1]);
+ stx->stx_rdev_minor = minor(inode.i_block[1]);
+ }
+ }
+
+ fuse4fs_set_statx_attr(stx, STATX_ATTR_COMPRESSED,
+ inode.i_flags & EXT2_COMPR_FL);
+ fuse4fs_set_statx_attr(stx, STATX_ATTR_IMMUTABLE,
+ inode.i_flags & EXT2_IMMUTABLE_FL);
+ fuse4fs_set_statx_attr(stx, STATX_ATTR_APPEND,
+ inode.i_flags & EXT2_APPEND_FL);
+ fuse4fs_set_statx_attr(stx, STATX_ATTR_NODUMP,
+ inode.i_flags & EXT2_NODUMP_FL);
+
+ fuse4fs_statx_directio(ff, stx);
+
+ return 0;
+}
+
+static void op_statx(fuse_req_t req, fuse_ino_t fino, int flags, int mask,
+ struct fuse_file_info *fi)
+{
+ struct statx stx = { };
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_ino_t ino;
+ int ret = 0;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ FUSE4FS_CONVERT_FINO(req, &ino, fino);
+ fuse4fs_start(ff);
+ ret = fuse4fs_statx(ff, ino, mask, &stx);
+ if (ret)
+ goto out;
+out:
+ fuse4fs_finish(ff, ret);
+ if (ret)
+ fuse_reply_err(req, -ret);
+ else
+ fuse_reply_statx(req, 0, &stx, FUSE4FS_ATTR_TIMEOUT);
+}
+#else
+# define op_statx NULL
+#endif
+
static void op_readlink(fuse_req_t req, fuse_ino_t fino)
{
struct ext2_inode inode;
@@ -7484,6 +7617,9 @@ static struct fuse_lowlevel_ops fs_ops = {
#ifdef SUPPORT_FALLOCATE
.fallocate = op_fallocate,
#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 18)
+ .statx = op_statx,
+#endif
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 138346fcc4517f..f9e8fca096ec2c 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -23,6 +23,7 @@
#include <sys/xattr.h>
#endif
#include <sys/ioctl.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <ctype.h>
#ifdef HAVE_FUSE_LOOPDEV
@@ -2006,6 +2007,133 @@ static int op_getattr_iflags(const char *path, struct stat *statbuf,
}
#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 18) && defined(STATX_BASIC_STATS)
+static inline void fuse2fs_set_statx_attr(struct statx *stx,
+ uint64_t statx_flag, int set)
+{
+ if (set)
+ stx->stx_attributes |= statx_flag;
+ stx->stx_attributes_mask |= statx_flag;
+}
+
+static void fuse2fs_statx_directio(struct fuse2fs *ff, struct statx *stx)
+{
+ struct statx devx;
+ errcode_t err;
+ int fd;
+
+ err = io_channel_get_fd(ff->fs->io, &fd);
+ if (err)
+ return;
+
+ err = statx(fd, "", AT_EMPTY_PATH, STATX_DIOALIGN, &devx);
+ if (err)
+ return;
+ if (!(devx.stx_mask & STATX_DIOALIGN))
+ return;
+
+ stx->stx_mask |= STATX_DIOALIGN;
+ stx->stx_dio_mem_align = devx.stx_dio_mem_align;
+ stx->stx_dio_offset_align = devx.stx_dio_offset_align;
+}
+
+static int fuse2fs_statx(struct fuse2fs *ff, ext2_ino_t ino, int statx_mask,
+ struct statx *stx)
+{
+ struct ext2_inode_large inode;
+ ext2_filsys fs = ff->fs;;
+ dev_t fakedev = 0;
+ errcode_t err;
+ struct timespec tv;
+
+ err = fuse2fs_read_inode(fs, ino, &inode);
+ if (err)
+ return translate_error(fs, ino, err);
+
+ memcpy(&fakedev, fs->super->s_uuid, sizeof(fakedev));
+ stx->stx_mask = STATX_BASIC_STATS;
+ stx->stx_dev_major = major(fakedev);
+ stx->stx_dev_minor = minor(fakedev);
+ stx->stx_ino = ino;
+ stx->stx_mode = inode.i_mode;
+ stx->stx_nlink = inode.i_links_count;
+ stx->stx_uid = inode_uid(inode);
+ stx->stx_gid = inode_gid(inode);
+ stx->stx_size = EXT2_I_SIZE(&inode);
+ stx->stx_blksize = fs->blocksize;
+ stx->stx_blocks = ext2fs_get_stat_i_blocks(fs,
+ EXT2_INODE(&inode));
+ EXT4_INODE_GET_XTIME(i_atime, &tv, &inode);
+ stx->stx_atime.tv_sec = tv.tv_sec;
+ stx->stx_atime.tv_nsec = tv.tv_nsec;
+
+ EXT4_INODE_GET_XTIME(i_mtime, &tv, &inode);
+ stx->stx_mtime.tv_sec = tv.tv_sec;
+ stx->stx_mtime.tv_nsec = tv.tv_nsec;
+
+ EXT4_INODE_GET_XTIME(i_ctime, &tv, &inode);
+ stx->stx_ctime.tv_sec = tv.tv_sec;
+ stx->stx_ctime.tv_nsec = tv.tv_nsec;
+
+ if (EXT4_FITS_IN_INODE(&inode, i_crtime)) {
+ stx->stx_mask |= STATX_BTIME;
+ EXT4_INODE_GET_XTIME(i_crtime, &tv, &inode);
+ stx->stx_btime.tv_sec = tv.tv_sec;
+ stx->stx_btime.tv_nsec = tv.tv_nsec;
+ }
+
+ dbg_printf(ff, "%s: ino=%d atime=%lld.%d mtime=%lld.%d ctime=%lld.%d btime=%lld.%d\n",
+ __func__, ino,
+ (long long int)stx->stx_atime.tv_sec, stx->stx_atime.tv_nsec,
+ (long long int)stx->stx_mtime.tv_sec, stx->stx_mtime.tv_nsec,
+ (long long int)stx->stx_ctime.tv_sec, stx->stx_ctime.tv_nsec,
+ (long long int)stx->stx_btime.tv_sec, stx->stx_btime.tv_nsec);
+
+ if (LINUX_S_ISCHR(inode.i_mode) ||
+ LINUX_S_ISBLK(inode.i_mode)) {
+ if (inode.i_block[0]) {
+ stx->stx_rdev_major = major(inode.i_block[0]);
+ stx->stx_rdev_minor = minor(inode.i_block[0]);
+ } else {
+ stx->stx_rdev_major = major(inode.i_block[1]);
+ stx->stx_rdev_minor = minor(inode.i_block[1]);
+ }
+ }
+
+ fuse2fs_set_statx_attr(stx, STATX_ATTR_COMPRESSED,
+ inode.i_flags & EXT2_COMPR_FL);
+ fuse2fs_set_statx_attr(stx, STATX_ATTR_IMMUTABLE,
+ inode.i_flags & EXT2_IMMUTABLE_FL);
+ fuse2fs_set_statx_attr(stx, STATX_ATTR_APPEND,
+ inode.i_flags & EXT2_APPEND_FL);
+ fuse2fs_set_statx_attr(stx, STATX_ATTR_NODUMP,
+ inode.i_flags & EXT2_NODUMP_FL);
+
+ fuse2fs_statx_directio(ff, stx);
+
+ return 0;
+}
+
+static int op_statx(const char *path, int statx_flags, int statx_mask,
+ struct statx *stx, struct fuse_file_info *fi)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ ext2_ino_t ino;
+ int ret = 0;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+ fuse2fs_start(ff);
+ ret = fuse2fs_file_ino(ff, path, fi, &ino);
+ if (ret)
+ goto out;
+ ret = fuse2fs_statx(ff, ino, statx_mask, stx);
+out:
+ fuse2fs_finish(ff, ret);
+ return ret;
+}
+#else
+# define op_statx NULL
+#endif
static int op_readlink(const char *path, char *buf, size_t len)
{
@@ -6793,6 +6921,9 @@ static struct fuse_operations fs_ops = {
#ifdef SUPPORT_FALLOCATE
.fallocate = op_fallocate,
#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 18)
+ .statx = op_statx,
+#endif
#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
.getattr_iflags = op_getattr_iflags,
#endif
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 17/19] fuse2fs: enable atomic writes
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (15 preceding siblings ...)
2026-04-29 14:56 ` [PATCH 16/19] fuse2fs: implement statx Darrick J. Wong
@ 2026-04-29 14:57 ` Darrick J. Wong
2026-04-29 14:57 ` [PATCH 18/19] fuse4fs: disable fs reclaim and write throttling Darrick J. Wong
2026-04-29 14:57 ` [PATCH 19/19] fuse2fs: implement freeze and shutdown requests Darrick J. Wong
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:57 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Advertise the single-fsblock atomic write capability that iomap can do.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 8d994fe490e914..49708bdf7b655d 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -297,6 +297,9 @@ struct fuse4fs {
void (*old_alloc_stats)(ext2_filsys fs, blk64_t blk, int inuse);
void (*old_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num,
int inuse);
+#ifdef STATX_WRITE_ATOMIC
+ unsigned int awu_min, awu_max;
+#endif
#endif
unsigned int blockmask;
unsigned long offset;
@@ -938,10 +941,22 @@ static inline bool fuse4fs_can_iomap(const struct fuse4fs *ff)
{
return ff->iomap_cap & FUSE_IOMAP_SUPPORT_FILEIO;
}
+
+static inline bool fuse4fs_iomap_supports_hw_atomic(const struct fuse4fs *ff)
+{
+ return fuse4fs_iomap_enabled(ff) &&
+ (ff->iomap_cap & FUSE_IOMAP_SUPPORT_ATOMIC) &&
+#ifdef STATX_WRITE_ATOMIC
+ ff->awu_max > 0 && ff->awu_min > 0;
+#else
+ 0;
+#endif
+}
#else
# define fuse4fs_iomap_enabled(...) (0)
# define fuse4fs_discover_iomap(...) ((void)0)
# define fuse4fs_can_iomap(...) (false)
+# define fuse4fs_iomap_supports_hw_atomic(...) (0)
#endif
static inline void fuse4fs_dump_extents(struct fuse4fs *ff, ext2_ino_t ino,
@@ -2337,8 +2352,12 @@ static int fuse4fs_stat_inode(struct fuse4fs *ff, ext2_ino_t ino,
fstat->iflags = 0;
#ifdef HAVE_FUSE_IOMAP
- if (fuse4fs_iomap_enabled(ff))
+ if (fuse4fs_iomap_enabled(ff)) {
fstat->iflags |= FUSE_IFLAG_IOMAP | FUSE_IFLAG_EXCLUSIVE;
+
+ if (fuse4fs_iomap_supports_hw_atomic(ff))
+ fstat->iflags |= FUSE_IFLAG_ATOMIC;
+ }
#endif
return 0;
@@ -2516,6 +2535,15 @@ static int fuse4fs_statx(struct fuse4fs *ff, ext2_ino_t ino, int statx_mask,
fuse4fs_statx_directio(ff, stx);
+#ifdef STATX_WRITE_ATOMIC
+ if (fuse4fs_iomap_supports_hw_atomic(ff)) {
+ stx->stx_mask |= STATX_WRITE_ATOMIC;
+ stx->stx_atomic_write_unit_min = ff->awu_min;
+ stx->stx_atomic_write_unit_max = ff->awu_max;
+ stx->stx_atomic_write_segments_max = 1;
+ }
+#endif
+
return 0;
}
@@ -6902,6 +6930,9 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
}
}
+ if (opflags & FUSE_IOMAP_OP_ATOMIC)
+ read.flags |= FUSE_IOMAP_F_ATOMIC_BIO;
+
out_unlock:
fuse4fs_finish(ff, ret);
if (ret)
@@ -7066,6 +7097,38 @@ static int fuse4fs_set_bdev_blocksize(struct fuse4fs *ff, int fd)
return -EIO;
}
+#ifdef STATX_WRITE_ATOMIC
+static void fuse4fs_configure_atomic_write(struct fuse4fs *ff, int bdev_fd)
+{
+ struct statx devx;
+ unsigned int awu_min, awu_max;
+ int ret;
+
+ if (!ext2fs_has_feature_extents(ff->fs->super))
+ return;
+
+ ret = statx(bdev_fd, "", AT_EMPTY_PATH, STATX_WRITE_ATOMIC, &devx);
+ if (ret)
+ return;
+ if (!(devx.stx_mask & STATX_WRITE_ATOMIC))
+ return;
+
+ awu_min = max(ff->fs->blocksize, devx.stx_atomic_write_unit_min);
+ awu_max = min(ff->fs->blocksize, devx.stx_atomic_write_unit_max);
+ if (awu_min > awu_max)
+ return;
+
+ log_printf(ff, "%s awu_min: %u, awu_max: %u\n",
+ _("Supports (experimental) DIO atomic writes"),
+ awu_min, awu_max);
+
+ ff->awu_min = awu_min;
+ ff->awu_max = awu_max;
+}
+#else
+# define fuse4fs_configure_atomic_write(...) ((void)0)
+#endif
+
static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
{
errcode_t err;
@@ -7090,6 +7153,8 @@ static int fuse4fs_iomap_config_devices(struct fuse4fs *ff)
dbg_printf(ff, "%s: registered iomap dev fd=%d iomap_dev=%u\n",
__func__, fd, ff->iomap_dev);
+ fuse4fs_configure_atomic_write(ff, fd);
+
ff->iomap_dev = ret;
return 0;
}
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index f9e8fca096ec2c..fe45ffa86823b0 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -280,6 +280,9 @@ struct fuse2fs {
void (*old_alloc_stats)(ext2_filsys fs, blk64_t blk, int inuse);
void (*old_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num,
int inuse);
+#ifdef STATX_WRITE_ATOMIC
+ unsigned int awu_min, awu_max;
+#endif
#endif
unsigned int blockmask;
unsigned long offset;
@@ -750,10 +753,22 @@ static inline bool fuse2fs_can_iomap(const struct fuse2fs *ff)
{
return ff->iomap_cap & FUSE_IOMAP_SUPPORT_FILEIO;
}
+
+static inline bool fuse2fs_iomap_supports_hw_atomic(const struct fuse2fs *ff)
+{
+ return fuse2fs_iomap_enabled(ff) &&
+ (ff->iomap_cap & FUSE_IOMAP_SUPPORT_ATOMIC) &&
+#ifdef STATX_WRITE_ATOMIC
+ ff->awu_max > 0 && ff->awu_min > 0;
+#else
+ 0;
+#endif
+}
#else
# define fuse2fs_iomap_enabled(...) (0)
# define fuse2fs_discover_iomap(...) ((void)0)
# define fuse2fs_can_iomap(...) (false)
+# define fuse2fs_iomap_supports_hw_atomic(...) (0)
#endif
static inline void fuse2fs_dump_extents(struct fuse2fs *ff, ext2_ino_t ino,
@@ -1995,14 +2010,19 @@ static int op_getattr(const char *path, struct stat *statbuf,
static int op_getattr_iflags(const char *path, struct stat *statbuf,
unsigned int *iflags, struct fuse_file_info *fi)
{
+ struct fuse2fs *ff = fuse2fs_get();
int ret = op_getattr(path, statbuf, fi);
if (ret)
return ret;
- if (fuse_fs_can_enable_iomap(statbuf))
+ if (fuse_fs_can_enable_iomap(statbuf)) {
*iflags |= FUSE_IFLAG_IOMAP | FUSE_IFLAG_EXCLUSIVE;
+ if (fuse2fs_iomap_supports_hw_atomic(ff))
+ *iflags |= FUSE_IFLAG_ATOMIC;
+ }
+
return 0;
}
#endif
@@ -2111,6 +2131,16 @@ static int fuse2fs_statx(struct fuse2fs *ff, ext2_ino_t ino, int statx_mask,
fuse2fs_statx_directio(ff, stx);
+#ifdef STATX_WRITE_ATOMIC
+ if (fuse_fs_can_enable_iomapx(stx) &&
+ fuse2fs_iomap_supports_hw_atomic(ff)) {
+ stx->stx_mask |= STATX_WRITE_ATOMIC;
+ stx->stx_atomic_write_unit_min = ff->awu_min;
+ stx->stx_atomic_write_unit_max = ff->awu_max;
+ stx->stx_atomic_write_segments_max = 1;
+ }
+#endif
+
return 0;
}
@@ -6220,6 +6250,9 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
}
}
+ if (opflags & FUSE_IOMAP_OP_ATOMIC)
+ read->flags |= FUSE_IOMAP_F_ATOMIC_BIO;
+
out_unlock:
fuse2fs_finish(ff, ret);
return ret;
@@ -6381,6 +6414,38 @@ static int fuse2fs_set_bdev_blocksize(struct fuse2fs *ff, int fd)
return -EIO;
}
+#ifdef STATX_WRITE_ATOMIC
+static void fuse2fs_configure_atomic_write(struct fuse2fs *ff, int bdev_fd)
+{
+ struct statx devx;
+ unsigned int awu_min, awu_max;
+ int ret;
+
+ if (!ext2fs_has_feature_extents(ff->fs->super))
+ return;
+
+ ret = statx(bdev_fd, "", AT_EMPTY_PATH, STATX_WRITE_ATOMIC, &devx);
+ if (ret)
+ return;
+ if (!(devx.stx_mask & STATX_WRITE_ATOMIC))
+ return;
+
+ awu_min = max(ff->fs->blocksize, devx.stx_atomic_write_unit_min);
+ awu_max = min(ff->fs->blocksize, devx.stx_atomic_write_unit_max);
+ if (awu_min > awu_max)
+ return;
+
+ log_printf(ff, "%s awu_min: %u, awu_max: %u\n",
+ _("Supports (experimental) DIO atomic writes"),
+ awu_min, awu_max);
+
+ ff->awu_min = awu_min;
+ ff->awu_max = awu_max;
+}
+#else
+# define fuse2fs_configure_atomic_write(...) ((void)0)
+#endif
+
static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
{
errcode_t err;
@@ -6405,6 +6470,8 @@ static int fuse2fs_iomap_config_devices(struct fuse2fs *ff)
dbg_printf(ff, "%s: registered iomap dev fd=%d iomap_dev=%u\n",
__func__, fd, ff->iomap_dev);
+ fuse2fs_configure_atomic_write(ff, fd);
+
ff->iomap_dev = ret;
return 0;
}
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 18/19] fuse4fs: disable fs reclaim and write throttling
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (16 preceding siblings ...)
2026-04-29 14:57 ` [PATCH 17/19] fuse2fs: enable atomic writes Darrick J. Wong
@ 2026-04-29 14:57 ` Darrick J. Wong
2026-04-29 14:57 ` [PATCH 19/19] fuse2fs: implement freeze and shutdown requests Darrick J. Wong
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:57 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Ask the kernel if we can disable fs reclaim and write throttling.
Disabling fs reclaim prevents livelocks where the fuse server can
allocate memory, fault into the kernel, and then the allocation tries to
initiate writeback by calling back into the same fuse server.
Disabling BDI write throttling means that writeback won't be throttled
by metadata writes to the filesystem.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 36 ++++++++++++++++++++++++++++++++++--
1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 49708bdf7b655d..6ea2d30772ae5a 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -7969,6 +7969,19 @@ static void try_set_io_flusher(struct fuse4fs *ff)
#endif
}
+/* Undo try_set_io_flusher */
+static void try_clear_io_flusher(struct fuse4fs *ff)
+{
+#ifdef HAVE_PR_SET_IO_FLUSHER
+ /*
+ * zero ret means it's already set, negative means we can't even
+ * look at the value so don't bother clearing it
+ */
+ if (prctl(PR_GET_IO_FLUSHER, 0, 0, 0, 0) > 0)
+ prctl(PR_SET_IO_FLUSHER, 0, 0, 0, 0);
+#endif
+}
+
/* Try to adjust the OOM score so that we don't get killed */
static void try_adjust_oom_score(struct fuse4fs *ff)
{
@@ -8022,6 +8035,23 @@ static int fuse4fs_event_loop(struct fuse4fs *ff,
struct fuse_loop_config *loop_config,
const struct fuse_cmdline_opts *opts)
{
+ bool clear_io_flusher = false;
+ int ret;
+
+ /*
+ * Try to set ourselves up with fs reclaim disabled to prevent
+ * recursive reclaim and throttling. This must be done before starting
+ * the worker threads so that they inherit the process flags.
+ */
+ ret = fuse_lowlevel_disable_fsreclaim(ff->fuse, 1);
+ if (ret) {
+ err_printf(ff, "%s: %s.\n",
+ _("Could not register as FS flusher thread"),
+ strerror(-ret));
+ try_set_io_flusher(ff);
+ clear_io_flusher = true;
+ }
+
/*
* Since there's a Big Kernel Lock around all the libext2fs code, we
* only need to start four threads -- one to decode a request, another
@@ -8032,7 +8062,10 @@ static int fuse4fs_event_loop(struct fuse4fs *ff,
fuse_loop_cfg_set_idle_threads(loop_config, opts->max_idle_threads);
fuse_loop_cfg_set_max_threads(loop_config, 4);
- return fuse_session_loop_mt(ff->fuse, loop_config) == 0 ? 0 : 8;
+ ret = fuse_session_loop_mt(ff->fuse, loop_config) == 0 ? 0 : 8;
+ if (clear_io_flusher)
+ try_clear_io_flusher(ff);
+ return ret;
}
#ifdef HAVE_FUSE4FS_SERVICE
@@ -8251,7 +8284,6 @@ int main(int argc, char *argv[])
}
}
- try_set_io_flusher(&fctx);
try_adjust_oom_score(&fctx);
/* Will we allow users to allocate every last block? */
^ permalink raw reply related [flat|nested] 50+ messages in thread* [PATCH 19/19] fuse2fs: implement freeze and shutdown requests
2026-04-29 14:20 ` [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
` (17 preceding siblings ...)
2026-04-29 14:57 ` [PATCH 18/19] fuse4fs: disable fs reclaim and write throttling Darrick J. Wong
@ 2026-04-29 14:57 ` Darrick J. Wong
18 siblings, 0 replies; 50+ messages in thread
From: Darrick J. Wong @ 2026-04-29 14:57 UTC (permalink / raw)
To: tytso
Cc: bernd, miklos, linux-ext4, neal, linux-fsdevel, fuse-devel,
joannelkoong
From: Darrick J. Wong <djwong@kernel.org>
Handle freezing and shutting down the filesystem if requested.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
fuse4fs/fuse4fs.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
misc/fuse2fs.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 175 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 6ea2d30772ae5a..46f81e2066b044 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -240,6 +240,7 @@ struct fuse4fs_file_handle {
enum fuse4fs_opstate {
F4OP_READONLY,
+ F4OP_WRITABLE_FROZEN,
F4OP_WRITABLE,
F4OP_SHUTDOWN,
};
@@ -6388,6 +6389,91 @@ static void op_fallocate(fuse_req_t req, fuse_ino_t fino EXT2FS_ATTR((unused)),
}
#endif /* SUPPORT_FALLOCATE */
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
+static void op_freezefs(fuse_req_t req, fuse_ino_t ino, uint64_t unlinked)
+{
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_filsys fs;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ fs = fuse4fs_start(ff);
+
+ if (ff->opstate == F4OP_WRITABLE) {
+ if (fs->super->s_error_count)
+ fs->super->s_state |= EXT2_ERROR_FS;
+ else if (!unlinked)
+ fs->super->s_state |= EXT2_VALID_FS;
+ ext2fs_mark_super_dirty(fs);
+ err = ext2fs_set_gdt_csum(fs);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ err = ext2fs_flush2(fs, 0);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ ff->opstate = F4OP_WRITABLE_FROZEN;
+ }
+
+out_unlock:
+ fs->super->s_state &= ~EXT2_VALID_FS;
+ fuse4fs_finish(ff, ret);
+ fuse_reply_err(req, -ret);
+}
+
+static void op_unfreezefs(fuse_req_t req, fuse_ino_t ino)
+{
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_filsys fs;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+ fs = fuse4fs_start(ff);
+
+ if (ff->opstate == F4OP_WRITABLE_FROZEN) {
+ if (fs->super->s_error_count)
+ fs->super->s_state |= EXT2_ERROR_FS;
+ fs->super->s_state &= ~EXT2_VALID_FS;
+ ext2fs_mark_super_dirty(fs);
+ err = ext2fs_set_gdt_csum(fs);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ err = ext2fs_flush2(fs, 0);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ ff->opstate = F4OP_WRITABLE;
+ }
+
+out_unlock:
+ fuse4fs_finish(ff, ret);
+ fuse_reply_err(req, -ret);
+}
+
+static void op_shutdownfs(fuse_req_t req, fuse_ino_t ino, uint64_t flags)
+{
+ const struct fuse_ctx *ctxt = fuse_req_ctx(req);
+ struct fuse4fs *ff = fuse4fs_get(req);
+ int ret;
+
+ ret = ioctl_shutdown(ff, ctxt, NULL, NULL, 0);
+
+ fuse_reply_err(req, -ret);
+}
+#endif
+
#ifdef HAVE_FUSE_IOMAP
static void fuse4fs_iomap_hole(struct fuse4fs *ff, struct fuse_file_iomap *iomap,
off_t pos, uint64_t count)
@@ -7685,6 +7771,11 @@ static struct fuse_lowlevel_ops fs_ops = {
#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 18)
.statx = op_statx,
#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
+ .freezefs = op_freezefs,
+ .unfreezefs = op_unfreezefs,
+ .shutdownfs = op_shutdownfs,
+#endif
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index fe45ffa86823b0..16b010fd28d4b5 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -223,6 +223,7 @@ struct fuse2fs_file_handle {
enum fuse2fs_opstate {
F2OP_READONLY,
+ F2OP_WRITABLE_FROZEN,
F2OP_WRITABLE,
F2OP_SHUTDOWN,
};
@@ -5709,6 +5710,86 @@ static int op_fallocate(const char *path EXT2FS_ATTR((unused)), int mode,
}
#endif /* SUPPORT_FALLOCATE */
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
+static int op_freezefs(const char *path, uint64_t unlinked)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ ext2_filsys fs;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+ fs = fuse2fs_start(ff);
+
+ if (ff->opstate == F2OP_WRITABLE) {
+ if (fs->super->s_error_count)
+ fs->super->s_state |= EXT2_ERROR_FS;
+ else if (!unlinked)
+ fs->super->s_state |= EXT2_VALID_FS;
+ ext2fs_mark_super_dirty(fs);
+ err = ext2fs_set_gdt_csum(fs);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ err = ext2fs_flush2(fs, 0);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ ff->opstate = F2OP_WRITABLE_FROZEN;
+ }
+
+out_unlock:
+ fs->super->s_state &= ~EXT2_VALID_FS;
+ fuse2fs_finish(ff, ret);
+ return ret;
+}
+
+static int op_unfreezefs(const char *path)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ ext2_filsys fs;
+ errcode_t err;
+ int ret = 0;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+ fs = fuse2fs_start(ff);
+
+ if (ff->opstate == F2OP_WRITABLE_FROZEN) {
+ if (fs->super->s_error_count)
+ fs->super->s_state |= EXT2_ERROR_FS;
+ ext2fs_mark_super_dirty(fs);
+ err = ext2fs_set_gdt_csum(fs);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ err = ext2fs_flush2(fs, 0);
+ if (err) {
+ ret = translate_error(fs, 0, err);
+ goto out_unlock;
+ }
+
+ ff->opstate = F2OP_WRITABLE;
+ }
+
+out_unlock:
+ fuse2fs_finish(ff, ret);
+ return ret;
+}
+
+static int op_shutdownfs(const char *path, uint64_t flags)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+
+ return ioctl_shutdown(ff, NULL, NULL);
+}
+#endif
+
#ifdef HAVE_FUSE_IOMAP
static void fuse2fs_iomap_hole(struct fuse2fs *ff, struct fuse_file_iomap *iomap,
off_t pos, uint64_t count)
@@ -6993,6 +7074,9 @@ static struct fuse_operations fs_ops = {
#endif
#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 99)
.getattr_iflags = op_getattr_iflags,
+ .freezefs = op_freezefs,
+ .unfreezefs = op_unfreezefs,
+ .shutdownfs = op_shutdownfs,
#endif
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
^ permalink raw reply related [flat|nested] 50+ messages in thread