public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: tytso@mit.edu
Cc: John@groves.net, bernd@bsbernd.com,
	linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
	miklos@szeredi.hu, joannelkoong@gmail.com, neal@gompa.dev
Subject: [PATCH 03/19] fuse2fs: implement iomap configuration
Date: Wed, 20 Aug 2025 18:16:29 -0700	[thread overview]
Message-ID: <175573713783.21970.2762866490407826816.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <175573713645.21970.9783397720493472605.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Upload the filesystem geometry to the kernel when asked.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 misc/fuse2fs.c |   96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 misc/fuse4fs.c |   96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 186 insertions(+), 6 deletions(-)


diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c63acd7a0ed155..5b17aadc006560 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -201,6 +201,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
 # define FL_ZERO_RANGE_FLAG (0)
 #endif
 
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC	(1000000000L)
+#endif
+
 errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
 
 const char *err_shortdev;
@@ -655,9 +659,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
 	EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
 	get_now(&now);
 
-	datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
-	dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
-	dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+	datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+	dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+	dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
 
 	/*
 	 * If atime is newer than mtime and atime hasn't been updated in thirty
@@ -5440,6 +5444,91 @@ static int op_iomap_end(const char *path, uint64_t nodeid, uint64_t attr_ino,
 
 	return 0;
 }
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse2fs_max_size(struct fuse2fs *ff, off_t upper_limit)
+{
+	off_t res;
+
+	if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+		upper_limit = (1LL << 32) - 1;
+
+		/* total blocks in file system block size */
+		upper_limit >>= (ff->blocklog - 9);
+		upper_limit <<= ff->blocklog;
+	}
+
+	/*
+	 * 32-bit extent-start container, ee_block. We lower the maxbytes
+	 * by one fs block, so ee_len can cover the extent of maximum file
+	 * size
+	 */
+	res = (1LL << 32) - 1;
+	res <<= ff->blocklog;
+
+	/* Sanity check against vm- & vfs- imposed limits */
+	if (res > upper_limit)
+		res = upper_limit;
+
+	return res;
+}
+
+static int op_iomap_config(uint64_t flags, off_t maxbytes,
+			   struct fuse_iomap_config *cfg)
+{
+	struct fuse2fs *ff = fuse2fs_get();
+	ext2_filsys fs;
+
+	FUSE2FS_CHECK_CONTEXT(ff);
+
+	dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+		   (unsigned long long)flags,
+		   (unsigned long long)maxbytes);
+	fs = fuse2fs_start(ff);
+
+	cfg->flags |= FUSE_IOMAP_CONFIG_UUID;
+	memcpy(cfg->s_uuid, fs->super->s_uuid, sizeof(cfg->s_uuid));
+	cfg->s_uuid_len = sizeof(fs->super->s_uuid);
+
+	cfg->flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+	cfg->s_blocksize = FUSE2FS_FSB_TO_B(ff, 1);
+
+	/*
+	 * If there inode is large enough to house i_[acm]time_extra then we
+	 * can turn on nanosecond timestamps; i_crtime was the next field added
+	 * after i_atime_extra.
+	 */
+	cfg->flags |= FUSE_IOMAP_CONFIG_TIME;
+	if (fs->super->s_inode_size >=
+	    offsetof(struct ext2_inode_large, i_crtime)) {
+		cfg->s_time_gran = 1;
+		cfg->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+	} else {
+		cfg->s_time_gran = NSEC_PER_SEC;
+		cfg->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+	}
+	cfg->s_time_min = EXT4_TIMESTAMP_MIN;
+
+	cfg->flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+	cfg->s_maxbytes = fuse2fs_max_size(ff, maxbytes);
+
+	fuse2fs_finish(ff, 0);
+	return 0;
+}
 #endif /* HAVE_FUSE_IOMAP */
 
 static struct fuse_operations fs_ops = {
@@ -5505,6 +5594,7 @@ static struct fuse_operations fs_ops = {
 #ifdef HAVE_FUSE_IOMAP
 	.iomap_begin = op_iomap_begin,
 	.iomap_end = op_iomap_end,
+	.iomap_config = op_iomap_config,
 #endif /* HAVE_FUSE_IOMAP */
 };
 
diff --git a/misc/fuse4fs.c b/misc/fuse4fs.c
index 2bc25ff37055d5..5876af19387c96 100644
--- a/misc/fuse4fs.c
+++ b/misc/fuse4fs.c
@@ -196,6 +196,10 @@ static inline uint64_t round_down(uint64_t b, unsigned int align)
 # define FL_ZERO_RANGE_FLAG (0)
 #endif
 
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC	(1000000000L)
+#endif
+
 errcode_t ext2fs_run_ext3_journal(ext2_filsys *fs);
 
 const char *err_shortdev;
@@ -808,9 +812,9 @@ static int update_atime(ext2_filsys fs, ext2_ino_t ino)
 	EXT4_INODE_GET_XTIME(i_mtime, &mtime, pinode);
 	get_now(&now);
 
-	datime = atime.tv_sec + ((double)atime.tv_nsec / 1000000000);
-	dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / 1000000000);
-	dnow = now.tv_sec + ((double)now.tv_nsec / 1000000000);
+	datime = atime.tv_sec + ((double)atime.tv_nsec / NSEC_PER_SEC);
+	dmtime = mtime.tv_sec + ((double)mtime.tv_nsec / NSEC_PER_SEC);
+	dnow = now.tv_sec + ((double)now.tv_nsec / NSEC_PER_SEC);
 
 	/*
 	 * If atime is newer than mtime and atime hasn't been updated in thirty
@@ -5850,6 +5854,91 @@ static void op_iomap_end(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
 
 	fuse_reply_err(req, 0);
 }
+
+/*
+ * Maximal extent format file size.
+ * Resulting logical blkno at s_maxbytes must fit in our on-disk
+ * extent format containers, within a sector_t, and within i_blocks
+ * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
+ * so that won't be a limiting factor.
+ *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
+ * Note, this does *not* consider any metadata overhead for vfs i_blocks.
+ */
+static off_t fuse4fs_max_size(struct fuse4fs *ff, off_t upper_limit)
+{
+	off_t res;
+
+	if (!ext2fs_has_feature_huge_file(ff->fs->super)) {
+		upper_limit = (1LL << 32) - 1;
+
+		/* total blocks in file system block size */
+		upper_limit >>= (ff->blocklog - 9);
+		upper_limit <<= ff->blocklog;
+	}
+
+	/*
+	 * 32-bit extent-start container, ee_block. We lower the maxbytes
+	 * by one fs block, so ee_len can cover the extent of maximum file
+	 * size
+	 */
+	res = (1LL << 32) - 1;
+	res <<= ff->blocklog;
+
+	/* Sanity check against vm- & vfs- imposed limits */
+	if (res > upper_limit)
+		res = upper_limit;
+
+	return res;
+}
+
+static void op_iomap_config(fuse_req_t req, uint64_t flags, uint64_t maxbytes)
+{
+	struct fuse_iomap_config cfg = { };
+	struct fuse4fs *ff = fuse4fs_get(req);
+	ext2_filsys fs;
+
+	FUSE4FS_CHECK_CONTEXT(req);
+
+	dbg_printf(ff, "%s: flags=0x%llx maxbytes=0x%llx\n", __func__,
+		   (unsigned long long)flags,
+		   (unsigned long long)maxbytes);
+	fs = fuse4fs_start(ff);
+
+	cfg.flags |= FUSE_IOMAP_CONFIG_UUID;
+	memcpy(cfg.s_uuid, fs->super->s_uuid, sizeof(cfg.s_uuid));
+	cfg.s_uuid_len = sizeof(fs->super->s_uuid);
+
+	cfg.flags |= FUSE_IOMAP_CONFIG_BLOCKSIZE;
+	cfg.s_blocksize = FUSE4FS_FSB_TO_B(ff, 1);
+
+	/*
+	 * If there inode is large enough to house i_[acm]time_extra then we
+	 * can turn on nanosecond timestamps; i_crtime was the next field added
+	 * after i_atime_extra.
+	 */
+	cfg.flags |= FUSE_IOMAP_CONFIG_TIME;
+	if (fs->super->s_inode_size >=
+	    offsetof(struct ext2_inode_large, i_crtime)) {
+		cfg.s_time_gran = 1;
+		cfg.s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+	} else {
+		cfg.s_time_gran = NSEC_PER_SEC;
+		cfg.s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+	}
+	cfg.s_time_min = EXT4_TIMESTAMP_MIN;
+
+	cfg.flags |= FUSE_IOMAP_CONFIG_MAXBYTES;
+	cfg.s_maxbytes = fuse4fs_max_size(ff, maxbytes);
+
+	fuse4fs_finish(ff, 0);
+	fuse_reply_iomap_config(req, &cfg);
+}
 #endif /* HAVE_FUSE_IOMAP */
 
 static struct fuse_lowlevel_ops fs_ops = {
@@ -5898,6 +5987,7 @@ static struct fuse_lowlevel_ops fs_ops = {
 #ifdef HAVE_FUSE_IOMAP
 	.iomap_begin = op_iomap_begin,
 	.iomap_end = op_iomap_end,
+	.iomap_config = op_iomap_config,
 #endif /* HAVE_FUSE_IOMAP */
 };
 


  parent reply	other threads:[~2025-08-21  1:16 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-21  0:37 [RFC v4] fuse: use fs-iomap for better performance so we can containerize ext4 Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 1/6] fuse4fs: fork a low level fuse server Darrick J. Wong
2025-08-21  1:08   ` [PATCH 01/20] fuse2fs: port fuse2fs to lowlevel libfuse API Darrick J. Wong
2025-08-21  1:08   ` [PATCH 02/20] fuse4fs: drop fuse 2.x support code Darrick J. Wong
2025-08-21  1:08   ` [PATCH 03/20] fuse4fs: namespace some helpers Darrick J. Wong
2025-08-21  1:08   ` [PATCH 04/20] fuse4fs: convert to low level API Darrick J. Wong
2025-08-21  1:09   ` [PATCH 05/20] libsupport: port the kernel list.h to libsupport Darrick J. Wong
2025-08-21  1:09   ` [PATCH 06/20] libsupport: add a cache Darrick J. Wong
2025-08-21  1:09   ` [PATCH 07/20] cache: disable debugging Darrick J. Wong
2025-08-21  1:09   ` [PATCH 08/20] cache: use modern list iterator macros Darrick J. Wong
2025-08-21  1:10   ` [PATCH 09/20] cache: embed struct cache in the owner Darrick J. Wong
2025-08-21  1:10   ` [PATCH 10/20] cache: pass cache pointer to callbacks Darrick J. Wong
2025-08-21  1:10   ` [PATCH 11/20] cache: pass a private data pointer through cache_walk Darrick J. Wong
2025-08-21  1:11   ` [PATCH 12/20] cache: add a helper to grab a new refcount for a cache_node Darrick J. Wong
2025-08-21  1:11   ` [PATCH 13/20] cache: return results of a cache flush Darrick J. Wong
2025-08-21  1:11   ` [PATCH 14/20] cache: add a "get only if incore" flag to cache_node_get Darrick J. Wong
2025-08-21  1:11   ` [PATCH 15/20] cache: support gradual expansion Darrick J. Wong
2025-08-21  1:12   ` [PATCH 16/20] cache: implement automatic shrinking Darrick J. Wong
2025-08-21  1:12   ` [PATCH 17/20] fuse4fs: add cache to track open files Darrick J. Wong
2025-08-21  1:12   ` [PATCH 18/20] fuse4fs: use the orphaned inode list Darrick J. Wong
2025-08-21  1:12   ` [PATCH 19/20] fuse4fs: implement FUSE_TMPFILE Darrick J. Wong
2025-08-21  1:13   ` [PATCH 20/20] fuse4fs: create incore reverse orphan list Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 2/6] libext2fs: refactoring for fuse2fs iomap support Darrick J. Wong
2025-08-21  1:13   ` [PATCH 01/10] libext2fs: make it possible to extract the fd from an IO manager Darrick J. Wong
2025-08-21  1:13   ` [PATCH 02/10] libext2fs: always fsync the device when flushing the cache Darrick J. Wong
2025-08-21  1:13   ` [PATCH 03/10] libext2fs: always fsync the device when closing the unix IO manager Darrick J. Wong
2025-08-21  1:14   ` [PATCH 04/10] libext2fs: only fsync the unix fd if we wrote to the device Darrick J. Wong
2025-08-21  1:14   ` [PATCH 05/10] libext2fs: invalidate cached blocks when freeing them Darrick J. Wong
2025-08-21  1:14   ` [PATCH 06/10] libext2fs: only flush affected blocks in unix_write_byte Darrick J. Wong
2025-08-21  1:14   ` [PATCH 07/10] libext2fs: allow unix_write_byte when the write would be aligned Darrick J. Wong
2025-08-21  1:15   ` [PATCH 08/10] libext2fs: allow clients to ask to write full superblocks Darrick J. Wong
2025-08-21  1:15   ` [PATCH 09/10] libext2fs: allow callers to disallow I/O to file data blocks Darrick J. Wong
2025-08-21  1:15   ` [PATCH 10/10] libext2fs: add posix advisory locking to the unix IO manager Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 3/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21  1:15   ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2025-08-21  1:16   ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
2025-08-21  1:16   ` Darrick J. Wong [this message]
2025-08-21  1:16   ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
2025-08-21  1:17   ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
2025-08-21  1:17   ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
2025-08-21  1:17   ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
2025-08-21  1:17   ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
2025-08-21  1:18   ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
2025-08-21  1:18   ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
2025-08-21  1:18   ` [PATCH 11/19] fuse2fs: avoid fuseblk mode if fuse-iomap support is likely Darrick J. Wong
2025-08-21  1:18   ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
2025-08-21  1:19   ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
2025-08-21  1:19   ` [PATCH 14/19] fuse2fs: add strictatime/lazytime mount options Darrick J. Wong
2025-08-21  1:19   ` [PATCH 15/19] fuse2fs: configure block device block size Darrick J. Wong
2025-08-21  1:19   ` [PATCH 16/19] fuse4fs: don't use inode number translation when possible Darrick J. Wong
2025-08-21  1:20   ` [PATCH 17/19] fuse4fs: separate invalidation Darrick J. Wong
2025-08-21  1:20   ` [PATCH 18/19] fuse2fs: implement statx Darrick J. Wong
2025-08-21  1:20   ` [PATCH 19/19] fuse2fs: enable atomic writes Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 4/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21  1:20   ` [PATCH 1/2] fuse2fs: enable caching of iomaps Darrick J. Wong
2025-08-21  1:21   ` [PATCH 2/2] fuse2fs: be smarter about caching iomaps Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 5/6] fuse2fs: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2025-08-21  1:21   ` [PATCH 1/8] fuse2fs: skip permission checking on utimens " Darrick J. Wong
2025-08-21  1:21   ` [PATCH 2/8] fuse2fs: let the kernel tell us about acl/mode updates Darrick J. Wong
2025-08-21  1:21   ` [PATCH 3/8] fuse2fs: better debugging for file mode updates Darrick J. Wong
2025-08-21  1:22   ` [PATCH 4/8] fuse2fs: debug timestamp updates Darrick J. Wong
2025-08-21  1:22   ` [PATCH 5/8] fuse2fs: use coarse timestamps for iomap mode Darrick J. Wong
2025-08-21  1:22   ` [PATCH 6/8] fuse2fs: add tracing for retrieving timestamps Darrick J. Wong
2025-08-21  1:23   ` [PATCH 7/8] fuse2fs: enable syncfs Darrick J. Wong
2025-08-21  1:23   ` [PATCH 8/8] fuse2fs: skip the gdt write in op_destroy if syncfs is working Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 6/6] fuse2fs: improve block and inode caching Darrick J. Wong
2025-08-21  1:23   ` [PATCH 1/6] libsupport: add caching IO manager Darrick J. Wong
2025-08-21  1:23   ` [PATCH 2/6] iocache: add the actual buffer cache Darrick J. Wong
2025-08-21  1:24   ` [PATCH 3/6] iocache: bump buffer mru priority every 50 accesses Darrick J. Wong
2025-08-21  1:24   ` [PATCH 4/6] fuse2fs: enable caching IO manager Darrick J. Wong
2025-08-21  1:24   ` [PATCH 5/6] fuse2fs: increase inode cache size Darrick J. Wong
2025-08-21  1:24   ` [PATCH 6/6] libext2fs: improve caching for inodes Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2026-02-23 23:04 [PATCHSET v7 1/8] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-02-23 23:36 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
2026-04-29 14:20 [PATCHSET v8 2/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-04-29 14:53 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175573713783.21970.2762866490407826816.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=John@groves.net \
    --cc=bernd@bsbernd.com \
    --cc=joannelkoong@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=neal@gompa.dev \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox