From: "Darrick J. Wong" <djwong@kernel.org>
To: tytso@mit.edu
Cc: John@groves.net, bernd@bsbernd.com,
linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
miklos@szeredi.hu, joannelkoong@gmail.com, neal@gompa.dev
Subject: [PATCH 03/19] fuse2fs: implement iomap configuration
Date: Wed, 20 Aug 2025 18:16:29 -0700 [thread overview]
Message-ID: <175573713783.21970.2762866490407826816.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <175573713645.21970.9783397720493472605.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Upload the filesystem geometry to the kernel when asked.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
misc/fuse2fs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
misc/fuse4fs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 186 insertions(+), 6 deletions(-)
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c63acd7a0ed155..5b17aadc006560 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -201,6 +201,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
# define FL_ZERO_RANGE_FLAG (0)
#endif
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC (1000000000L)
+#endif
+
errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
const char *err_shortdev;
@@ -655,9 +659,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
get_now(&now);
- datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
- dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
- dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+ datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+ dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+ dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
/*
* If atime is newer than mtime and atime hasn't been updated in thirty
@@ -5440,6 +5444,91 @@ static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
return 0;
}
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse2fs_max_size(struct fuse2fs *ff, off_t upper_limit)
+{
+ off_t res;
+
+ if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+ upper_limit = (1LL << 32) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (ff->blocklog - 9);
+ upper_limit <<= ff->blocklog;
+ }
+
+ /*
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
+ * by one fs block, so ee_len can cover the extent of maximum file
+ * size
+ */
+ res = (1LL << 32) - 1;
+ res <<= ff->blocklog;
+
+ /* Sanity check against vm- & vfs- imposed limits */
+ if (res > upper_limit)
+ res = upper_limit;
+
+ return res;
+}
+
+static int op_iomap_config(uint64_t flags, off_t maxbytes,
+ struct fuse_iomap_config *cfg)
+{
+ struct fuse2fs *ff = fuse2fs_get();
+ ext2_filsys fs;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+
+ dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+ (unsigned long long)flags,
+ (unsigned long long)maxbytes);
+ fs = fuse2fs_start(ff);
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_UUID;
+ memcpy(cfg->s_uuid, fs->super->s_uuid, sizeof(cfg->s_uuid));
+ cfg->s_uuid_len = sizeof(fs->super->s_uuid);
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+ cfg->s_blocksize = FUSE2FS_FSB_TO_B(ff, 1);
+
+ /*
+ * If there inode is large enough to house i_[acm]time_extra then we
+ * can turn on nanosecond timestamps; i_crtime was the next field added
+ * after i_atime_extra.
+ */
+ cfg->flags |= FUSE_IOMAP_CONFIG_TIME;
+ if (fs->super->s_inode_size >=
+ offsetof(struct ext2_inode_large, i_crtime)) {
+ cfg->s_time_gran = 1;
+ cfg->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ cfg->s_time_gran = NSEC_PER_SEC;
+ cfg->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ cfg->s_time_min = EXT4_TIMESTAMP_MIN;
+
+ cfg->flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+ cfg->s_maxbytes = fuse2fs_max_size(ff, maxbytes);
+
+ fuse2fs_finish(ff, 0);
+ return 0;
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_operations fs_ops = {
@@ -5505,6 +5594,7 @@ static struct fuse_operations fs_ops = {
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
+ .iomap_config = op_iomap_config,
#endif /* HAVE_FUSE_IOMAP */
};
diff --git a/misc/fuse4fs.c b/misc/fuse4fs.c
index 2bc25ff37055d5..5876af19387c96 100644
--- a/misc/fuse4fs.c
+++ b/misc/fuse4fs.c
@@ -196,6 +196,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
# define FL_ZERO_RANGE_FLAG (0)
#endif
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC (1000000000L)
+#endif
+
errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
const char *err_shortdev;
@@ -808,9 +812,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
get_now(&now);
- datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
- dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
- dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+ datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+ dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+ dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
/*
* If atime is newer than mtime and atime hasn't been updated in thirty
@@ -5850,6 +5854,91 @@ static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
fuse_reply_err(req, 0);
}
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse4fs_max_size(struct fuse4fs *ff, off_t upper_limit)
+{
+ off_t res;
+
+ if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+ upper_limit = (1LL << 32) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (ff->blocklog - 9);
+ upper_limit <<= ff->blocklog;
+ }
+
+ /*
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
+ * by one fs block, so ee_len can cover the extent of maximum file
+ * size
+ */
+ res = (1LL << 32) - 1;
+ res <<= ff->blocklog;
+
+ /* Sanity check against vm- & vfs- imposed limits */
+ if (res > upper_limit)
+ res = upper_limit;
+
+ return res;
+}
+
+static void op_iomap_config(fuse_req_t req, uint64_t flags, uint64_t maxbytes)
+{
+ struct fuse_iomap_config cfg = { };
+ struct fuse4fs *ff = fuse4fs_get(req);
+ ext2_filsys fs;
+
+ FUSE4FS_CHECK_CONTEXT(req);
+
+ dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+ (unsigned long long)flags,
+ (unsigned long long)maxbytes);
+ fs = fuse4fs_start(ff);
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_UUID;
+ memcpy(cfg.s_uuid, fs->super->s_uuid, sizeof(cfg.s_uuid));
+ cfg.s_uuid_len = sizeof(fs->super->s_uuid);
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+ cfg.s_blocksize = FUSE4FS_FSB_TO_B(ff, 1);
+
+ /*
+ * If there inode is large enough to house i_[acm]time_extra then we
+ * can turn on nanosecond timestamps; i_crtime was the next field added
+ * after i_atime_extra.
+ */
+ cfg.flags |= FUSE_IOMAP_CONFIG_TIME;
+ if (fs->super->s_inode_size >=
+ offsetof(struct ext2_inode_large, i_crtime)) {
+ cfg.s_time_gran = 1;
+ cfg.s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ cfg.s_time_gran = NSEC_PER_SEC;
+ cfg.s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ cfg.s_time_min = EXT4_TIMESTAMP_MIN;
+
+ cfg.flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+ cfg.s_maxbytes = fuse4fs_max_size(ff, maxbytes);
+
+ fuse4fs_finish(ff, 0);
+ fuse_reply_iomap_config(req, &cfg);
+}
#endif /* HAVE_FUSE_IOMAP */
static struct fuse_lowlevel_ops fs_ops = {
@@ -5898,6 +5987,7 @@ static struct fuse_lowlevel_ops fs_ops = {
#ifdef HAVE_FUSE_IOMAP
.iomap_begin = op_iomap_begin,
.iomap_end = op_iomap_end,
+ .iomap_config = op_iomap_config,
#endif /* HAVE_FUSE_IOMAP */
};
next prev parent reply other threads:[~2025-08-21 1:16 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-21 0:37 [RFC v4] fuse: use fs-iomap for better performance so we can containerize ext4 Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 1/6] fuse4fs: fork a low level fuse server Darrick J. Wong
2025-08-21 1:08 ` [PATCH 01/20] fuse2fs: port fuse2fs to lowlevel libfuse API Darrick J. Wong
2025-08-21 1:08 ` [PATCH 02/20] fuse4fs: drop fuse 2.x support code Darrick J. Wong
2025-08-21 1:08 ` [PATCH 03/20] fuse4fs: namespace some helpers Darrick J. Wong
2025-08-21 1:08 ` [PATCH 04/20] fuse4fs: convert to low level API Darrick J. Wong
2025-08-21 1:09 ` [PATCH 05/20] libsupport: port the kernel list.h to libsupport Darrick J. Wong
2025-08-21 1:09 ` [PATCH 06/20] libsupport: add a cache Darrick J. Wong
2025-08-21 1:09 ` [PATCH 07/20] cache: disable debugging Darrick J. Wong
2025-08-21 1:09 ` [PATCH 08/20] cache: use modern list iterator macros Darrick J. Wong
2025-08-21 1:10 ` [PATCH 09/20] cache: embed struct cache in the owner Darrick J. Wong
2025-08-21 1:10 ` [PATCH 10/20] cache: pass cache pointer to callbacks Darrick J. Wong
2025-08-21 1:10 ` [PATCH 11/20] cache: pass a private data pointer through cache_walk Darrick J. Wong
2025-08-21 1:11 ` [PATCH 12/20] cache: add a helper to grab a new refcount for a cache_node Darrick J. Wong
2025-08-21 1:11 ` [PATCH 13/20] cache: return results of a cache flush Darrick J. Wong
2025-08-21 1:11 ` [PATCH 14/20] cache: add a "get only if incore" flag to cache_node_get Darrick J. Wong
2025-08-21 1:11 ` [PATCH 15/20] cache: support gradual expansion Darrick J. Wong
2025-08-21 1:12 ` [PATCH 16/20] cache: implement automatic shrinking Darrick J. Wong
2025-08-21 1:12 ` [PATCH 17/20] fuse4fs: add cache to track open files Darrick J. Wong
2025-08-21 1:12 ` [PATCH 18/20] fuse4fs: use the orphaned inode list Darrick J. Wong
2025-08-21 1:12 ` [PATCH 19/20] fuse4fs: implement FUSE_TMPFILE Darrick J. Wong
2025-08-21 1:13 ` [PATCH 20/20] fuse4fs: create incore reverse orphan list Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 2/6] libext2fs: refactoring for fuse2fs iomap support Darrick J. Wong
2025-08-21 1:13 ` [PATCH 01/10] libext2fs: make it possible to extract the fd from an IO manager Darrick J. Wong
2025-08-21 1:13 ` [PATCH 02/10] libext2fs: always fsync the device when flushing the cache Darrick J. Wong
2025-08-21 1:13 ` [PATCH 03/10] libext2fs: always fsync the device when closing the unix IO manager Darrick J. Wong
2025-08-21 1:14 ` [PATCH 04/10] libext2fs: only fsync the unix fd if we wrote to the device Darrick J. Wong
2025-08-21 1:14 ` [PATCH 05/10] libext2fs: invalidate cached blocks when freeing them Darrick J. Wong
2025-08-21 1:14 ` [PATCH 06/10] libext2fs: only flush affected blocks in unix_write_byte Darrick J. Wong
2025-08-21 1:14 ` [PATCH 07/10] libext2fs: allow unix_write_byte when the write would be aligned Darrick J. Wong
2025-08-21 1:15 ` [PATCH 08/10] libext2fs: allow clients to ask to write full superblocks Darrick J. Wong
2025-08-21 1:15 ` [PATCH 09/10] libext2fs: allow callers to disallow I/O to file data blocks Darrick J. Wong
2025-08-21 1:15 ` [PATCH 10/10] libext2fs: add posix advisory locking to the unix IO manager Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 3/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21 1:15 ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2025-08-21 1:16 ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
2025-08-21 1:16 ` Darrick J. Wong [this message]
2025-08-21 1:16 ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
2025-08-21 1:17 ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
2025-08-21 1:17 ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
2025-08-21 1:17 ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
2025-08-21 1:17 ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
2025-08-21 1:18 ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
2025-08-21 1:18 ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
2025-08-21 1:18 ` [PATCH 11/19] fuse2fs: avoid fuseblk mode if fuse-iomap support is likely Darrick J. Wong
2025-08-21 1:18 ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
2025-08-21 1:19 ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
2025-08-21 1:19 ` [PATCH 14/19] fuse2fs: add strictatime/lazytime mount options Darrick J. Wong
2025-08-21 1:19 ` [PATCH 15/19] fuse2fs: configure block device block size Darrick J. Wong
2025-08-21 1:19 ` [PATCH 16/19] fuse4fs: don't use inode number translation when possible Darrick J. Wong
2025-08-21 1:20 ` [PATCH 17/19] fuse4fs: separate invalidation Darrick J. Wong
2025-08-21 1:20 ` [PATCH 18/19] fuse2fs: implement statx Darrick J. Wong
2025-08-21 1:20 ` [PATCH 19/19] fuse2fs: enable atomic writes Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 4/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21 1:20 ` [PATCH 1/2] fuse2fs: enable caching of iomaps Darrick J. Wong
2025-08-21 1:21 ` [PATCH 2/2] fuse2fs: be smarter about caching iomaps Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 5/6] fuse2fs: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2025-08-21 1:21 ` [PATCH 1/8] fuse2fs: skip permission checking on utimens " Darrick J. Wong
2025-08-21 1:21 ` [PATCH 2/8] fuse2fs: let the kernel tell us about acl/mode updates Darrick J. Wong
2025-08-21 1:21 ` [PATCH 3/8] fuse2fs: better debugging for file mode updates Darrick J. Wong
2025-08-21 1:22 ` [PATCH 4/8] fuse2fs: debug timestamp updates Darrick J. Wong
2025-08-21 1:22 ` [PATCH 5/8] fuse2fs: use coarse timestamps for iomap mode Darrick J. Wong
2025-08-21 1:22 ` [PATCH 6/8] fuse2fs: add tracing for retrieving timestamps Darrick J. Wong
2025-08-21 1:23 ` [PATCH 7/8] fuse2fs: enable syncfs Darrick J. Wong
2025-08-21 1:23 ` [PATCH 8/8] fuse2fs: skip the gdt write in op_destroy if syncfs is working Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 6/6] fuse2fs: improve block and inode caching Darrick J. Wong
2025-08-21 1:23 ` [PATCH 1/6] libsupport: add caching IO manager Darrick J. Wong
2025-08-21 1:23 ` [PATCH 2/6] iocache: add the actual buffer cache Darrick J. Wong
2025-08-21 1:24 ` [PATCH 3/6] iocache: bump buffer mru priority every 50 accesses Darrick J. Wong
2025-08-21 1:24 ` [PATCH 4/6] fuse2fs: enable caching IO manager Darrick J. Wong
2025-08-21 1:24 ` [PATCH 5/6] fuse2fs: increase inode cache size Darrick J. Wong
2025-08-21 1:24 ` [PATCH 6/6] libext2fs: improve caching for inodes Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2026-02-23 23:04 [PATCHSET v7 1/8] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-02-23 23:36 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
2026-04-29 14:20 [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-04-29 14:53 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=175573713783.21970.2762866490407826816.stgit@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=John@groves.net \
--cc=bernd@bsbernd.com \
--cc=joannelkoong@gmail.com \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=neal@gompa.dev \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox