* [PATCH v13 09/15] cifs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Steve French, Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Upper layers such as NFSD need a way to query whether a filesystem
handles filenames in a case-sensitive manner. Report CIFS/SMB case
handling behavior via FS_XFLAG_CASEFOLD and
FS_XFLAG_CASENONPRESERVING.
The authoritative source is the server itself: at mount time CIFS
issues QueryFSInfo(FS_ATTRIBUTE_INFORMATION) and caches the reply
on the tcon. That reply carries FILE_CASE_SENSITIVE_SEARCH and
FILE_CASE_PRESERVED_NAMES, which reflect whatever case handling
the share actually implements after SMB3.1.1 POSIX extensions
negotiation. Translating those two bits into the VFS flags lets
cifs_fileattr_get report what the server advertises rather than
what the client was asked to pretend.
QueryFSInfo is best-effort; the mount completes even if the server
does not answer. MaxPathNameComponentLength is zero in that case
and is used as the "no reply received" sentinel. When no reply is
available, fall back to the nocase mount option so that the reported
behavior agrees with the dentry comparison operations installed on
the superblock.
The callback is registered on cifs_dir_inode_ops so that NFSD,
ksmbd, and other consumers querying case handling against a
directory get a definitive answer, and on cifs_file_inode_ops to
preserve FS_COMPR_FL reporting on regular files. cifs_set_ops()
also installs cifs_namespace_inode_operations on DFS referral
directories that carry IS_AUTOMOUNT; register the same callback
there so the answer does not depend on whether the directory is
a referral point.
Registering fileattr_get routes FS_IOC_GETFLAGS through
vfs_fileattr_get() and short-circuits the syscall's fallback to
cifs_ioctl(). That fallback invoked CIFSGetExtAttr() under
CONFIG_CIFS_POSIX and CONFIG_CIFS_ALLOW_INSECURE_LEGACY on servers
advertising CIFS_UNIX_EXTATTR_CAP, surfacing the SMB1 Unix-extension
immutable, append, and nodump bits. cifs_fileattr_get carries over
only FS_COMPR_FL from cached cifsAttrs; the SMB1 extattr fetch is
not reproduced. SMB1 is deprecated, and acquiring a netfid from
within a dentry-only callback is not worth preserving a path tied
to an insecure legacy dialect.
Acked-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/smb/client/cifsfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
fs/smb/client/cifsfs.h | 3 +++
fs/smb/client/namespace.c | 1 +
3 files changed, 57 insertions(+)
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 2025739f070a..6c113ae7fdd3 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -30,6 +30,7 @@
#include <linux/xattr.h>
#include <linux/mm.h>
#include <linux/key-type.h>
+#include <linux/fileattr.h>
#include <uapi/linux/magic.h>
#include <net/ipv6.h>
#include "cifsfs.h"
@@ -1199,6 +1200,56 @@ struct file_system_type smb3_fs_type = {
MODULE_ALIAS_FS("smb3");
MODULE_ALIAS("smb3");
+int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct inode *inode = d_inode(dentry);
+ u32 attrs;
+
+ /* Preserve FS_COMPR_FL previously reported by cifs_ioctl(). */
+ if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED)
+ fa->flags |= FS_COMPR_FL;
+
+ /*
+ * FS_CASEFOLD_FL is defined by UAPI as a folder attribute,
+ * and userspace tools (e.g., lsattr) display it only on
+ * directories. Confine the case-handling bits to directories
+ * to match that convention; for non-directories the share's
+ * case semantics are still discoverable through the parent.
+ */
+ if (!S_ISDIR(inode->i_mode))
+ return 0;
+
+ /*
+ * The server's FS_ATTRIBUTE_INFORMATION response, cached on
+ * the tcon at mount, reflects the share's case-handling
+ * semantics after any POSIX extensions negotiation. Prefer
+ * it over the client-local nocase mount option, which only
+ * governs dentry comparison on this superblock.
+ *
+ * QueryFSInfo is best-effort at mount; when it did not
+ * populate fsAttrInfo, MaxPathNameComponentLength remains
+ * zero. In that case fall back to nocase so the reporting
+ * matches the comparison behavior installed on the sb.
+ */
+ if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) == 0) {
+ if (tcon->nocase) {
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ }
+ return 0;
+ }
+ attrs = le32_to_cpu(tcon->fsAttrInfo.Attributes);
+ if (!(attrs & FILE_CASE_SENSITIVE_SEARCH)) {
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ }
+ if (!(attrs & FILE_CASE_PRESERVED_NAMES))
+ fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+ return 0;
+}
+
const struct inode_operations cifs_dir_inode_ops = {
.create = cifs_create,
.atomic_open = cifs_atomic_open,
@@ -1217,6 +1268,7 @@ const struct inode_operations cifs_dir_inode_ops = {
.listxattr = cifs_listxattr,
.get_acl = cifs_get_acl,
.set_acl = cifs_set_acl,
+ .fileattr_get = cifs_fileattr_get,
};
const struct inode_operations cifs_file_inode_ops = {
@@ -1227,6 +1279,7 @@ const struct inode_operations cifs_file_inode_ops = {
.fiemap = cifs_fiemap,
.get_acl = cifs_get_acl,
.set_acl = cifs_set_acl,
+ .fileattr_get = cifs_fileattr_get,
};
const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 7370b38da938..5f0d459d1a89 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -89,6 +89,9 @@ extern const struct inode_operations cifs_file_inode_ops;
extern const struct inode_operations cifs_symlink_inode_ops;
extern const struct inode_operations cifs_namespace_inode_operations;
+struct file_kattr;
+int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
/* Functions related to files and directories */
extern const struct netfs_request_ops cifs_req_ops;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index 52a520349cb7..52a51b032fae 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -294,4 +294,5 @@ struct vfsmount *cifs_d_automount(struct path *path)
}
const struct inode_operations cifs_namespace_inode_operations = {
+ .fileattr_get = cifs_fileattr_get,
};
--
2.53.0
^ permalink raw reply related
* [PATCH v13 08/15] xfs: Report case sensitivity in fileattr_get
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Upper layers such as NFSD need to query whether a filesystem
is case-sensitive. Add FS_XFLAG_CASEFOLD to xfs_ip2xflags()
when the filesystem is formatted with the ASCIICI feature
flag. This serves both FS_IOC_FSGETXATTR (via xfs_fill_fsxattr()
in xfs_fileattr_get()) and XFS_IOC_BULKSTAT (which populates
bs_xflags directly from xfs_ip2xflags()), so bulkstat consumers
and per-inode queries see a consistent view of the filesystem's
case-folding behavior.
FS_XFLAG_CASEFOLD is read-only: FS_XFLAG_RDONLY_MASK ensures
FS_IOC_FSSETXATTR strips it, and xfs_flags2diflags() has no
clause for CASEFOLD so the on-disk diflags are unaffected.
The legacy FS_IOC_SETFLAGS path in xfs_fileattr_set() also
allows FS_CASEFOLD_FL through its allowlist on ASCIICI
filesystems so that a chattr read-modify-write cycle does
not fail with EOPNOTSUPP.
XFS always preserves case. XFS is case-sensitive by default,
but supports ASCII case-insensitive lookups when formatted
with the ASCIICI feature flag.
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/xfs/libxfs/xfs_inode_util.c | 2 ++
fs/xfs/xfs_ioctl.c | 20 +++++++++++++++++---
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index 551fa51befb6..82be54b6f8d3 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -130,6 +130,8 @@ xfs_ip2xflags(
if (xfs_inode_has_attr_fork(ip))
flags |= FS_XFLAG_HASATTR;
+ if (xfs_has_asciici(ip->i_mount))
+ flags |= FS_XFLAG_CASEFOLD;
return flags;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ed9b4846c05f..f8216f74679f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -755,9 +755,23 @@ xfs_fileattr_set(
trace_xfs_ioctl_setattr(ip);
if (!fa->fsx_valid) {
- if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL |
- FS_NOATIME_FL | FS_NODUMP_FL |
- FS_SYNC_FL | FS_DAX_FL | FS_PROJINHERIT_FL))
+ unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL |
+ FS_NOATIME_FL | FS_NODUMP_FL |
+ FS_SYNC_FL | FS_DAX_FL |
+ FS_PROJINHERIT_FL;
+
+ /*
+ * FS_CASEFOLD_FL reflects the ASCIICI superblock feature,
+ * a read-only property. Accept it as a no-op so chattr's
+ * RMW round-trip succeeds; reject any attempt to enable
+ * it on a non-ASCIICI filesystem. xfs_flags2diflags()
+ * has no clause for CASEFOLD, so the bit is dropped from
+ * the on-disk diflags regardless.
+ */
+ if (xfs_has_asciici(mp))
+ allowed |= FS_CASEFOLD_FL;
+
+ if (fa->flags & ~allowed)
return -EOPNOTSUPP;
}
--
2.53.0
^ permalink raw reply related
* [PATCH v13 07/15] hfsplus: Report case sensitivity in fileattr_get
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Add case sensitivity reporting to the existing hfsplus_fileattr_get()
function via the FS_XFLAG_CASEFOLD flag. HFS+ always preserves case
at rest.
Case sensitivity depends on how the volume was formatted: HFSX
volumes may be either case-sensitive or case-insensitive, indicated
by the HFSPLUS_SB_CASEFOLD superblock flag.
FS_XFLAG_CASEFOLD is read-only: FS_XFLAG_RDONLY_MASK ensures
FS_IOC_FSSETXATTR strips it. The legacy FS_IOC_SETFLAGS path in
hfsplus_fileattr_set() also allows FS_CASEFOLD_FL through its
allowlist on case-insensitive volumes so that a chattr
read-modify-write cycle does not fail with EOPNOTSUPP.
Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/hfsplus/inode.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d05891ec492e..5565c14b4bf6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -740,6 +740,7 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
unsigned int flags = 0;
if (inode->i_flags & S_IMMUTABLE)
@@ -748,6 +749,8 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
flags |= FS_APPEND_FL;
if (hip->userflags & HFSPLUS_FLG_NODUMP)
flags |= FS_NODUMP_FL;
+ if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags))
+ flags |= FS_CASEFOLD_FL;
fileattr_fill_flags(fa, flags);
@@ -759,13 +762,24 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(dentry);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
+ unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL;
unsigned int new_fl = 0;
if (fileattr_has_fsx(fa))
return -EOPNOTSUPP;
+ /*
+ * FS_CASEFOLD_FL reflects HFSPLUS_SB_CASEFOLD, a mount-time
+ * property. Accept it as a no-op so chattr's RMW round-trip
+ * succeeds; reject any attempt to enable it on a volume that
+ * was not formatted case-insensitive.
+ */
+ if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags))
+ allowed |= FS_CASEFOLD_FL;
+
/* don't silently ignore unsupported ext2 flags */
- if (fa->flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
+ if (fa->flags & ~allowed)
return -EOPNOTSUPP;
if (fa->flags & FS_IMMUTABLE_FL)
--
2.53.0
^ permalink raw reply related
* [PATCH v13 06/15] hfs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Report HFS case sensitivity behavior via the FS_XFLAG_CASEFOLD
flag. HFS is always case-insensitive (using Mac OS Roman case
folding) and always preserves case at rest.
Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/hfs/dir.c | 1 +
fs/hfs/hfs_fs.h | 2 ++
fs/hfs/inode.c | 14 ++++++++++++++
3 files changed, 17 insertions(+)
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index f5e7efe924e7..c4c6e1623f55 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -328,4 +328,5 @@ const struct inode_operations hfs_dir_inode_operations = {
.rmdir = hfs_remove,
.rename = hfs_rename,
.setattr = hfs_inode_setattr,
+ .fileattr_get = hfs_fileattr_get,
};
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ac0e83f77a0f..1b23448c9a48 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -177,6 +177,8 @@ extern int hfs_get_block(struct inode *inode, sector_t block,
extern const struct address_space_operations hfs_aops;
extern const struct address_space_operations hfs_btree_aops;
+struct file_kattr;
+int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned int len, struct folio **foliop,
void **fsdata);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 89b33a9d46d5..f41cc261684d 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -18,6 +18,7 @@
#include <linux/uio.h>
#include <linux/xattr.h>
#include <linux/blkdev.h>
+#include <linux/fileattr.h>
#include "hfs_fs.h"
#include "btree.h"
@@ -699,6 +700,18 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
return ret;
}
+int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ /*
+ * HFS compares filenames using Mac OS Roman case folding, so
+ * lookup is always case-insensitive. Names are stored on disk
+ * with case intact; CASENONPRESERVING stays clear.
+ */
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ return 0;
+}
+
static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -715,4 +728,5 @@ static const struct inode_operations hfs_file_inode_operations = {
.lookup = hfs_file_lookup,
.setattr = hfs_inode_setattr,
.listxattr = generic_listxattr,
+ .fileattr_get = hfs_fileattr_get,
};
--
2.53.0
^ permalink raw reply related
* [PATCH v13 05/15] ntfs3: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Report NTFS case sensitivity behavior via the FS_XFLAG_CASEFOLD
flag. NTFS always preserves case at rest.
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/ntfs3/file.c | 29 +++++++++++++++++++++++++++++
fs/ntfs3/inode.c | 1 +
fs/ntfs3/namei.c | 2 ++
fs/ntfs3/ntfs_fs.h | 1 +
4 files changed, 33 insertions(+)
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index b041639ab406..ad9350d7fc3f 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -180,6 +180,34 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
}
#endif
+/*
+ * ntfs_fileattr_get - inode_operations::fileattr_get
+ */
+int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ntfs_i(inode))))
+ return -EINVAL;
+
+ /*
+ * NTFS preserves case (the default). Case sensitivity depends on
+ * mount options: with "nocase", NTFS is case-insensitive;
+ * otherwise it is case-sensitive.
+ */
+ if (sbi->options->nocase) {
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ }
+ if (inode->i_flags & S_IMMUTABLE) {
+ fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+ fa->flags |= FS_IMMUTABLE_FL;
+ }
+ return 0;
+}
+
/*
* ntfs_getattr - inode_operations::getattr
*/
@@ -1547,6 +1575,7 @@ const struct inode_operations ntfs_file_inode_operations = {
.get_acl = ntfs_get_acl,
.set_acl = ntfs_set_acl,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
};
const struct file_operations ntfs_file_operations = {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 42af1abe17f8..a5ff04c2efd3 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -2095,6 +2095,7 @@ const struct inode_operations ntfs_link_inode_operations = {
.get_link = ntfs_get_link,
.setattr = ntfs_setattr,
.listxattr = ntfs_listxattr,
+ .fileattr_get = ntfs_fileattr_get,
};
const struct address_space_operations ntfs_aops = {
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index b2af8f695e60..eb241d7796ba 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -518,6 +518,7 @@ const struct inode_operations ntfs_dir_inode_operations = {
.getattr = ntfs_getattr,
.listxattr = ntfs_listxattr,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
};
const struct inode_operations ntfs_special_inode_operations = {
@@ -526,6 +527,7 @@ const struct inode_operations ntfs_special_inode_operations = {
.listxattr = ntfs_listxattr,
.get_acl = ntfs_get_acl,
.set_acl = ntfs_set_acl,
+ .fileattr_get = ntfs_fileattr_get,
};
const struct dentry_operations ntfs_dentry_ops = {
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index bbf3b6a1dcbe..41db22d652c4 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -529,6 +529,7 @@ bool dir_is_empty(struct inode *dir);
extern const struct file_operations ntfs_dir_operations;
/* Globals from file.c */
+int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, u32 flags);
int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
--
2.53.0
^ permalink raw reply related
* [PATCH v13 04/15] exfat: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Report exFAT's case sensitivity behavior via the FS_XFLAG_CASEFOLD
flag. exFAT compares names through the volume's upcase table; in
practice that table folds case, and case is preserved at rest.
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/exfat/exfat_fs.h | 2 ++
fs/exfat/file.c | 18 ++++++++++++++++--
fs/exfat/namei.c | 1 +
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 89ef5368277f..aff4dcd4e75a 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -496,6 +496,8 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, unsigned int request_mask,
unsigned int query_flags);
+struct file_kattr;
+int exfat_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long exfat_compat_ioctl(struct file *filp, unsigned int cmd,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 354bdcfe4abc..91e5511945d1 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -14,6 +14,7 @@
#include <linux/writeback.h>
#include <linux/filelock.h>
#include <linux/falloc.h>
+#include <linux/fileattr.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
@@ -323,6 +324,18 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
return 0;
}
+int exfat_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ /*
+ * exFAT compares filenames through an upcase table, so lookup
+ * is always case-insensitive. Long names are stored in UTF-16
+ * with case intact; CASENONPRESERVING stays clear.
+ */
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ return 0;
+}
+
int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
@@ -817,6 +830,7 @@ const struct file_operations exfat_file_operations = {
};
const struct inode_operations exfat_file_inode_operations = {
- .setattr = exfat_setattr,
- .getattr = exfat_getattr,
+ .setattr = exfat_setattr,
+ .getattr = exfat_getattr,
+ .fileattr_get = exfat_fileattr_get,
};
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 2c5636634b4a..94002e43db08 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -1311,4 +1311,5 @@ const struct inode_operations exfat_dir_inode_operations = {
.rename = exfat_rename,
.setattr = exfat_setattr,
.getattr = exfat_getattr,
+ .fileattr_get = exfat_fileattr_get,
};
--
2.53.0
^ permalink raw reply related
* [PATCH v13 03/15] fat: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Report FAT's case sensitivity behavior via the FS_XFLAG_CASEFOLD
and FS_XFLAG_CASENONPRESERVING flags. FAT filesystems are
case-insensitive by default.
MSDOS supports a 'nocase' mount option that enables case-sensitive
behavior; check this option when reporting case sensitivity.
VFAT long filename entries preserve case; without VFAT, only
uppercased 8.3 short names are stored. MSDOS with 'nocase' also
preserves case since the name-formatting code skips upcasing when
'nocase' is set. Check both options when reporting case preservation.
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/fat/fat.h | 3 +++
fs/fat/file.c | 36 ++++++++++++++++++++++++++++++++++++
fs/fat/namei_msdos.c | 1 +
fs/fat/namei_vfat.c | 1 +
4 files changed, 41 insertions(+)
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 5a58f0bf8ce8..99ed9228a677 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -10,6 +10,8 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
+struct file_kattr;
+
/*
* vfat shortname flags
*/
@@ -408,6 +410,7 @@ extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
extern int fat_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
+int fat_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index becccdd2e501..37e7049b4c8c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,6 +17,7 @@
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/falloc.h>
+#include <linux/fileattr.h>
#include "fat.h"
static long fat_fallocate(struct file *file, int mode,
@@ -398,6 +399,40 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
fat_flush_inodes(inode->i_sb, inode, NULL);
}
+int fat_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
+ bool case_sensitive;
+
+ /*
+ * FAT filesystems are case-insensitive by default. VFAT
+ * becomes case-sensitive when mounted with 'check=strict',
+ * which installs vfat_dentry_ops. MSDOS has no such option;
+ * its 'nocase' mount option selects case-sensitive matching.
+ *
+ * VFAT long filename entries preserve case. Without VFAT, only
+ * uppercased 8.3 short names are stored. MSDOS with 'nocase'
+ * also preserves case.
+ */
+ if (sbi->options.isvfat)
+ case_sensitive = sbi->options.name_check == 's';
+ else
+ case_sensitive = sbi->options.nocase;
+
+ if (!case_sensitive) {
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+ fa->flags |= FS_CASEFOLD_FL;
+ if (!sbi->options.isvfat)
+ fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+ }
+ if (d_inode(dentry)->i_flags & S_IMMUTABLE) {
+ fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+ fa->flags |= FS_IMMUTABLE_FL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fat_fileattr_get);
+
int fat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
@@ -575,5 +610,6 @@ EXPORT_SYMBOL_GPL(fat_setattr);
const struct inode_operations fat_file_inode_operations = {
.setattr = fat_setattr,
.getattr = fat_getattr,
+ .fileattr_get = fat_fileattr_get,
.update_time = fat_update_time,
};
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 4cc65f330fb7..0fd2971ad4b1 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -644,6 +644,7 @@ static const struct inode_operations msdos_dir_inode_operations = {
.rename = msdos_rename,
.setattr = fat_setattr,
.getattr = fat_getattr,
+ .fileattr_get = fat_fileattr_get,
.update_time = fat_update_time,
};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 918b3756674c..e909447873e3 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1185,6 +1185,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
.rename = vfat_rename2,
.setattr = fat_setattr,
.getattr = fat_getattr,
+ .fileattr_get = fat_fileattr_get,
.update_time = fat_update_time,
};
--
2.53.0
^ permalink raw reply related
* [PATCH v13 02/15] fs: Add case sensitivity flags to file_kattr
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Darrick J. Wong, Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
Enable upper layers such as NFSD to retrieve case sensitivity
information from file systems by adding FS_XFLAG_CASEFOLD and
FS_XFLAG_CASENONPRESERVING flags.
Filesystems report case-insensitive or case-nonpreserving behavior
by setting these flags directly in fa->fsx_xflags. The default
(flags unset) indicates POSIX semantics: case-sensitive and
case-preserving. Both flags are added to FS_XFLAG_RDONLY_MASK so
FS_IOC_FSSETXATTR silently strips them, keeping the new xflags
strictly a reporting interface. Callers that want to toggle
casefolding continue to use FS_IOC_SETFLAGS with FS_CASEFOLD_FL,
the established UAPI on filesystems that support the operation
(ext4 and f2fs on empty directories).
Case sensitivity information is exported to userspace via the
fa_xflags field in the FS_IOC_FSGETXATTR ioctl and file_getattr()
system call.
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/file_attr.c | 4 ++++
include/linux/fileattr.h | 3 ++-
include/uapi/linux/fs.h | 7 +++++++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/fs/file_attr.c b/fs/file_attr.c
index f429da66a317..bfb00d256dd5 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -37,6 +37,8 @@ void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags)
fa->flags |= FS_PROJINHERIT_FL;
if (fa->fsx_xflags & FS_XFLAG_VERITY)
fa->flags |= FS_VERITY_FL;
+ if (fa->fsx_xflags & FS_XFLAG_CASEFOLD)
+ fa->flags |= FS_CASEFOLD_FL;
}
EXPORT_SYMBOL(fileattr_fill_xflags);
@@ -67,6 +69,8 @@ void fileattr_fill_flags(struct file_kattr *fa, u32 flags)
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
if (fa->flags & FS_VERITY_FL)
fa->fsx_xflags |= FS_XFLAG_VERITY;
+ if (fa->flags & FS_CASEFOLD_FL)
+ fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
}
EXPORT_SYMBOL(fileattr_fill_flags);
diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h
index 3780904a63a6..58044b598016 100644
--- a/include/linux/fileattr.h
+++ b/include/linux/fileattr.h
@@ -16,7 +16,8 @@
/* Read-only inode flags */
#define FS_XFLAG_RDONLY_MASK \
- (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY)
+ (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY | \
+ FS_XFLAG_CASEFOLD | FS_XFLAG_CASENONPRESERVING)
/* Flags to indicate valid value of fsx_ fields */
#define FS_XFLAG_VALUES_MASK \
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 13f71202845e..2ea4c81df08f 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -254,6 +254,13 @@ struct file_attr {
#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */
+/*
+ * Case handling flags (read-only, cannot be set via ioctl).
+ * Default (neither set) indicates POSIX semantics: case-sensitive
+ * lookups and case-preserving storage.
+ */
+#define FS_XFLAG_CASEFOLD 0x00040000 /* case-insensitive lookups */
+#define FS_XFLAG_CASENONPRESERVING 0x00080000 /* case not preserved */
#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* the read-only stuff doesn't really belong here, but any other place is
--
2.53.0
^ permalink raw reply related
* [PATCH v13 01/15] fs: Move file_kattr initialization to callers
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Darrick J. Wong, Roland Mainz
In-Reply-To: <20260502-case-sensitivity-v13-0-aa853140311f@oracle.com>
From: Chuck Lever <chuck.lever@oracle.com>
fileattr_fill_xflags() and fileattr_fill_flags() memset the
entire file_kattr struct before populating select fields, so
callers cannot pre-set fields in fa->fsx_xflags without having
their values clobbered. Darrick Wong noted that a function
named "fill_xflags" touching more than xflags forces callers
to know implementation details beyond its apparent scope.
Drop the memset from both fill functions and initialize at the
entry points instead: ioctl_setflags(), ioctl_fssetxattr(),
the file_setattr() syscall, and xfs_ioc_fsgetxattra() now
declare fa with an aggregate initializer. ioctl_getflags(),
ioctl_fsgetxattr(), and the file_getattr() syscall already
aggregate-initialize fa to pass flags_valid/fsx_valid hints
into vfs_fileattr_get().
Subsequent patches rely on this so that ->fileattr_get()
handlers can set case-sensitivity flags (FS_XFLAG_CASEFOLD,
FS_XFLAG_CASENONPRESERVING) in fa->fsx_xflags before the fill
functions run.
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/file_attr.c | 12 ++++--------
fs/xfs/xfs_ioctl.c | 2 +-
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/fs/file_attr.c b/fs/file_attr.c
index da983e105d70..f429da66a317 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -15,12 +15,10 @@
* @fa: fileattr pointer
* @xflags: FS_XFLAG_* flags
*
- * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags). All
- * other fields are zeroed.
+ * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags).
*/
void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags)
{
- memset(fa, 0, sizeof(*fa));
fa->fsx_valid = true;
fa->fsx_xflags = xflags;
if (fa->fsx_xflags & FS_XFLAG_IMMUTABLE)
@@ -48,11 +46,9 @@ EXPORT_SYMBOL(fileattr_fill_xflags);
* @flags: FS_*_FL flags
*
* Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
- * All other fields are zeroed.
*/
void fileattr_fill_flags(struct file_kattr *fa, u32 flags)
{
- memset(fa, 0, sizeof(*fa));
fa->flags_valid = true;
fa->flags = flags;
if (fa->flags & FS_SYNC_FL)
@@ -325,7 +321,7 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp)
{
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct dentry *dentry = file->f_path.dentry;
- struct file_kattr fa;
+ struct file_kattr fa = {};
unsigned int flags;
int err;
@@ -357,7 +353,7 @@ int ioctl_fssetxattr(struct file *file, void __user *argp)
{
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct dentry *dentry = file->f_path.dentry;
- struct file_kattr fa;
+ struct file_kattr fa = {};
int err;
err = copy_fsxattr_from_user(&fa, argp);
@@ -431,7 +427,7 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename,
struct path filepath __free(path_put) = {};
unsigned int lookup_flags = 0;
struct file_attr fattr;
- struct file_kattr fa;
+ struct file_kattr fa = {};
int error;
BUILD_BUG_ON(sizeof(struct file_attr) < FILE_ATTR_SIZE_VER0);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 46e234863644..ed9b4846c05f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -517,7 +517,7 @@ xfs_ioc_fsgetxattra(
xfs_inode_t *ip,
void __user *arg)
{
- struct file_kattr fa;
+ struct file_kattr fa = {};
xfs_ilock(ip, XFS_ILOCK_SHARED);
xfs_fill_fsxattr(ip, XFS_ATTR_FORK, &fa);
--
2.53.0
^ permalink raw reply related
* [PATCH v13 00/15] Exposing case folding behavior
From: Chuck Lever @ 2026-05-02 14:20 UTC (permalink / raw)
To: Al Viro, Christian Brauner, Jan Kara
Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
Darrick J. Wong, Roland Mainz, Steve French
Following on from:
https://lore.kernel.org/linux-nfs/20251021-zypressen-bazillus-545a44af57fd@brauner/T/#m0ba197d75b7921d994cf284f3cef3a62abb11aaa
I'm attempting to implement enough support in the Linux VFS to
enable file services like NFSD and ksmbd (and user space
equivalents) to provide the actual status of case folding support
in local file systems. The default behavior for local file systems
not explicitly supported in this series is to reflect the usual
POSIX behaviors:
case-insensitive = false
case-nonpreserving = false
The case-insensitivity and case-nonpreserving booleans can be
consumed immediately by NFSD. These two attributes have been part of
the NFSv3 and NFSv4 protocols for decades, in order to support NFS
client implementations on non-POSIX systems.
Support for user space file servers is why this series exposes case
folding information via a user-space API. I don't know of any other
category of user-space application that requires access to case
folding info.
The Linux NFS community has a growing interest in supporting NFS
clients on Windows and MacOS platforms, where file name behavior does
not align with traditional POSIX semantics.
One example of a Windows-based NFS client is [1]. This client
implementation explicitly requires servers to report
FATTR4_WORD0_CASE_INSENSITIVE = TRUE for proper operation, a hard
requirement for Windows client interoperability because Windows
applications expect case-insensitive behavior. When an NFS client
knows the server is case-insensitive, it can avoid issuing multiple
LOOKUP/READDIR requests to search for case variants, and applications
like Win32 programs work correctly without manual workarounds or
code changes.
Even the Linux client can take advantage of this information. Trond
merged patches 4 years ago [2] that introduce support for case
insensitivity, in support of the Hammerspace NFS server. In
particular, when a client detects a case-insensitive NFS share,
negative dentry caching must be disabled (a lookup for "FILE.TXT"
failing shouldn't cache a negative entry when "file.txt" exists)
and directory change invalidation must clear all cached case-folded
file name variants.
Hammerspace servers and several other NFS server implementations
operate in multi-protocol environments, where a single file service
instance caters to both NFS and SMB clients. In those cases, things
work more smoothly for everyone when the NFS client can see and adapt
to the case folding behavior that SMB users rely on and expect. NFSD
needs to support the case-insensitivity and case-nonpreserving
booleans properly in order to participate as a first-class citizen
in such environments.
[1] https://github.com/kofemann/ms-nfs41-client
[2] https://patchwork.kernel.org/project/linux-nfs/cover/20211217203658.439352-1-trondmy@kernel.org/
---
Changes since v12:
- Address findings from sashiko (gemini-3.1):
- cifs: Restrict case-handling flags to directories per UAPI
- nfs: Clear case caps before PATHCONF so a failed reply
does not retain stale bits from the prior probe
- nfsd: Document the parent-resolution corner cases of
nfsd_get_case_info() (single-file exports, disconnected
dentries, hardlinks) in the v3 and v4 commit messages
Changes since v11:
- isofs: Wire .fileattr_get only on directory inodes, since
NFSD and ksmbd query casefolding on directories (Jan Kara)
- xfs, hfsplus: Drop the FS_CASEFOLD_FL fileattr_get mask;
admit the bit through fileattr_set's allowlist instead
- Address findings from sashiko(gemini-3) and gpt-5.5:
- cifs: Wire .fileattr_get on cifs_namespace_inode_operations
so DFS referral / automount directories report case handling
- fat, ntfs3: Fill FS_IMMUTABLE_FL in fileattr_get
- hfsplus: Hide FS_CASEFOLD_FL from the legacy flags view so
chattr round-trips do not hit the setflags whitelist
- nfs: Clear NFS_CAP_CASE_INSENSITIVE and
NFS_CAP_CASE_NONPRESERVING before re-OR'ing in the v3 and
v4 probe paths so re-probe / TSM does not retain stale caps
- nfsd: Switch nfsd_get_case_info() to errno return so
v3 PATHCONF and v4 GETATTR can apply version-appropriate
policy on failure
- nfsd: Use dget_parent() in v4 case-attr probe to keep
the parent dentry referenced across the query
- isofs: Report FS_XFLAG_CASENONPRESERVING for map=n/map=a
Changes since v10:
- cifs: Source case-handling flags from the server's cached
FS_ATTRIBUTE_INFORMATION reply instead of the nocase mount
option, with a nocase fallback when the reply is absent
- Address findings from sashiko(gemini-3) and gpt-5.5:
- nfs: Skip pathconf case bits on NFSv4 (set via FATTR4_CASE_*
instead)
- xfs: Hide FS_CASEFOLD_FL from the legacy flags view so
chattr round-trips do not hit the setflags whitelist
- ext4, f2fs: Drop redundant fileattr_get patches; the
FS_CASEFOLD_FL translation in fileattr_fill_flags() already
reports FS_XFLAG_CASEFOLD for casefolded directories
- nfsd: Report FATTR4_HOMOGENEOUS = FALSE when the exported
filesystem has a Unicode encoding, since per-directory
casefold makes the fs-scoped case attributes inhomogeneous
- nfsd: Document in nfsd_get_case_info() why -ENOIOCTLCMD and
-ENOTTY are swallowed while other errors propagate
- fat: Honor vfat 'check=strict' when reporting FS_XFLAG_CASEFOLD
- Set FS_CASEFOLD_FL so FS_IOC_GETFLAGS reflects case-insensitive
mount
- isofs: Register fileattr_get on regular file and symlink inodes,
not just directories
- nfsd: Query NFSv4 FATTR4_CASE_* from the parent directory for
non-directory objects, since casefold lives on the directory
Changes since v9:
- nfs: always probe PATHCONF for case caps. Default to case-
preserving when the server does not report case_preserving
- nfsd, ksmbd: tolerate -ENOTTY from vfs_fileattr_get() so
overlayfs exports on backing filesystems without fileattr_get
do not fail the RPC
- xfs: map FS_XFLAG_CASEFOLD inside xfs_ip2xflags() so BULKSTAT
and FS_IOC_FSGETXATTR report the flag consistently
- vboxsf: reject a short host reply to SHFL_INFO_VOLUME before
trusting volinfo.properties.case_sensitive
Changes since v8:
- Rebase on v7.0-rc1
Changes since v7:
- Split file_attr initialization changes into a separate patch
Changes since v6:
- Remove the memset from vfs_fileattr_get
Changes since v5:
- Finish the conversion to FS_XFLAGs
- NFSv4 GETATTR now clears the attr mask bit if nfsd_get_case_info()
fails
Changes since v4:
- Observe the MSDOS "nocase" mount option
- Define new FS_XFLAGs for the user API
Changes since v3:
- Change fa->case_preserving to fa_case_nonpreserving
- VFAT is case preserving
- Make new fields available to user space
Changes since v2:
- Remove unicode labels
- Replace vfs_get_case_info
- Add support for several more local file system implementations
- Add support for in-kernel SMB server
Changes since RFC:
- Use file_getattr instead of statx
- Postpone exposing Unicode version until later
- Support NTFS and ext4 in addition to FAT
- Support NFSv4 fattr4 in addition to NFSv3 PATHCONF
---
Chuck Lever (15):
fs: Move file_kattr initialization to callers
fs: Add case sensitivity flags to file_kattr
fat: Implement fileattr_get for case sensitivity
exfat: Implement fileattr_get for case sensitivity
ntfs3: Implement fileattr_get for case sensitivity
hfs: Implement fileattr_get for case sensitivity
hfsplus: Report case sensitivity in fileattr_get
xfs: Report case sensitivity in fileattr_get
cifs: Implement fileattr_get for case sensitivity
nfs: Implement fileattr_get for case sensitivity
vboxsf: Implement fileattr_get for case sensitivity
isofs: Implement fileattr_get for case sensitivity
nfsd: Report export case-folding via NFSv3 PATHCONF
nfsd: Implement NFSv4 FATTR4_CASE_INSENSITIVE and FATTR4_CASE_PRESERVING
ksmbd: Report filesystem case sensitivity via FS_ATTRIBUTE_INFORMATION
fs/exfat/exfat_fs.h | 2 ++
fs/exfat/file.c | 18 +++++++++--
fs/exfat/namei.c | 1 +
fs/fat/fat.h | 3 ++
fs/fat/file.c | 36 +++++++++++++++++++++
fs/fat/namei_msdos.c | 1 +
fs/fat/namei_vfat.c | 1 +
fs/file_attr.c | 16 +++++-----
fs/hfs/dir.c | 1 +
fs/hfs/hfs_fs.h | 2 ++
fs/hfs/inode.c | 14 ++++++++
fs/hfsplus/inode.c | 16 +++++++++-
fs/isofs/dir.c | 16 ++++++++++
fs/isofs/isofs.h | 3 ++
fs/nfs/client.c | 21 ++++++++----
fs/nfs/inode.c | 15 +++++++++
fs/nfs/internal.h | 3 ++
fs/nfs/namespace.c | 2 ++
fs/nfs/nfs3proc.c | 2 ++
fs/nfs/nfs3xdr.c | 7 ++--
fs/nfs/nfs4proc.c | 10 ++++--
fs/nfs/proc.c | 3 ++
fs/nfs/symlink.c | 3 ++
fs/nfsd/nfs3proc.c | 36 ++++++++++++++++-----
fs/nfsd/nfs4xdr.c | 52 ++++++++++++++++++++++++++++--
fs/nfsd/vfs.c | 72 ++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/vfs.h | 3 ++
fs/nfsd/xdr3.h | 4 +--
fs/ntfs3/file.c | 29 +++++++++++++++++
fs/ntfs3/inode.c | 1 +
fs/ntfs3/namei.c | 2 ++
fs/ntfs3/ntfs_fs.h | 1 +
fs/smb/client/cifsfs.c | 53 +++++++++++++++++++++++++++++++
fs/smb/client/cifsfs.h | 3 ++
fs/smb/client/namespace.c | 1 +
fs/smb/server/smb2pdu.c | 30 ++++++++++++++----
fs/vboxsf/dir.c | 1 +
fs/vboxsf/file.c | 6 ++--
fs/vboxsf/super.c | 7 ++++
fs/vboxsf/utils.c | 30 ++++++++++++++++++
fs/vboxsf/vfsmod.h | 6 ++++
fs/xfs/libxfs/xfs_inode_util.c | 2 ++
fs/xfs/xfs_ioctl.c | 22 ++++++++++---
include/linux/fileattr.h | 3 +-
include/linux/nfs_fs_sb.h | 2 +-
include/linux/nfs_xdr.h | 2 ++
include/uapi/linux/fs.h | 7 ++++
47 files changed, 522 insertions(+), 49 deletions(-)
---
base-commit: 6596a02b207886e9e00bb0161c7fd59fea53c081
change-id: 20260422-case-sensitivity-5cbffc8f1558
Best regards,
--
Chuck Lever <chuck.lever@oracle.com>
^ permalink raw reply
* Re: [PATCH 02/13] mount_service: add systemd socket service mounting helper
From: Darrick J. Wong @ 2026-05-01 17:35 UTC (permalink / raw)
To: bernd; +Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363568.1314717.5220084842430554136.stgit@frogsfrogsfrogs>
On Thu, Apr 30, 2026 at 02:15:32PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create a mount helper program that can start a fuse server that runs as
> a socket-based systemd service, and a new libfuse module to wrap all the
> details of communicating between the mount helper and the containerized
> fuse server.
>
> This enables untrusted ext4 mounts via systemd service containers, which
> avoids the problem of malicious filesystems compromising the integrity
> of the running kernel through memory corruption.
>
> In theory this could also be supported via inetd and clones, though the
> author hasn't found one that supports AF_UNIX sockets.
>
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> ---
> include/fuse_service.h | 243 ++++
> include/fuse_service_priv.h | 160 ++
> lib/mount_common_i.h | 3
> util/mount_service.h | 40 +
> .github/workflows/install-ubuntu-dependencies.sh | 4
> doc/fuservicemount3.8 | 24
> doc/meson.build | 3
> include/meson.build | 4
> lib/fuse_service.c | 1233 +++++++++++++++++++
> lib/fuse_service_stub.c | 106 ++
> lib/fuse_versionscript | 17
> lib/helper.c | 51 +
> lib/meson.build | 17
> lib/mount.c | 12
> meson.build | 34 +
> meson_options.txt | 9
> util/fuservicemount.c | 18
> util/meson.build | 9
> util/mount_service.c | 1427 ++++++++++++++++++++++
> 19 files changed, 3412 insertions(+), 2 deletions(-)
> create mode 100644 include/fuse_service.h
> create mode 100644 include/fuse_service_priv.h
> create mode 100644 util/mount_service.h
> create mode 100644 doc/fuservicemount3.8
> create mode 100644 lib/fuse_service.c
> create mode 100644 lib/fuse_service_stub.c
> create mode 100644 util/fuservicemount.c
> create mode 100644 util/mount_service.c
>
>
> diff --git a/include/fuse_service.h b/include/fuse_service.h
> new file mode 100644
> index 00000000000000..7e4c204e7a70bf
> --- /dev/null
> +++ b/include/fuse_service.h
> @@ -0,0 +1,243 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * This program can be distributed under the terms of the GNU LGPLv2.
> + * See the file LGPL2.txt.
> + */
> +#ifndef FUSE_SERVICE_H_
> +#define FUSE_SERVICE_H_
> +
> +/** @file
> + *
> + * Low level API
> + *
> + * IMPORTANT: you should define FUSE_USE_VERSION before including this
> + * header. To use the newest API define it to 319 (recommended for any
> + * new application).
> + */
> +
> +#ifndef FUSE_USE_VERSION
> +#error FUSE_USE_VERSION not defined
> +#endif
> +
> +#include "fuse_common.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#if FUSE_MAKE_VERSION(3, 19) <= FUSE_USE_VERSION
> +
> +struct fuse_service;
> +
> +/**
> + * Accept a socket created by mount.service for information exchange.
> + *
> + * @param sfp pointer to pointer to a service context. The pointer will always
> + * be initialized by this function; use fuse_service_accepted to
> + * find out if the fuse server is actually running as a service.
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_accept(struct fuse_service **sfp);
> +
> +/**
> + * Has the fuse server accepted a service context?
> + *
> + * @param sf service context
> + * @return true if it has, false if not
> + */
> +static inline bool fuse_service_accepted(struct fuse_service *sf)
> +{
> + return sf != NULL;
> +}
> +
> +/**
> + * Will the mount service helper accept the allow_other option?
> + *
> + * @param sf service context
> + * @return true if it has, false if not
> + */
> +bool fuse_service_can_allow_other(struct fuse_service *sf);
> +
> +/**
> + * Release all resources associated with the service context.
> + *
> + * @param sfp service context
> + */
> +void fuse_service_release(struct fuse_service *sf);
> +
> +/**
> + * Destroy a service context and release all resources
> + *
> + * @param sfp pointer to pointer to a service context
> + */
> +void fuse_service_destroy(struct fuse_service **sfp);
> +
> +/**
> + * Append the command line arguments from the mount service helper to an
> + * existing fuse_args structure. The fuse_args should have been initialized
> + * with the argc and argv passed to main().
> + *
> + * @param sfp service context
> + * @param args arguments to modify (input+output)
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_append_args(struct fuse_service *sf, struct fuse_args *args);
> +
> +/**
> + * Generate the effective fuse server command line from the args structure.
> + * The args structure should be the outcome from fuse_service_append_args.
> + * The resulting string is suitable for setproctitle and must be freed by the
> + * callre.
> + *
> + * @param argc argument count passed to main()
> + * @param argv argument vector passed to main()
> + * @param args fuse args structure
> + * @return effective command line string, or NULL
> + */
> +char *fuse_service_cmdline(int argc, char *argv[], struct fuse_args *args);
> +
> +struct fuse_cmdline_opts;
> +
> +/**
> + * Utility function to parse common options for simple file systems
> + * using the low-level API. A help text that describes the available
> + * options can be printed with `fuse_cmdline_help`. A single
> + * non-option argument is treated as the mountpoint. Multiple
> + * non-option arguments will result in an error.
> + *
> + * If neither -o subtype= or -o fsname= options are given, a new
> + * subtype option will be added and set to the basename of the program
> + * (the fsname will remain unset, and then defaults to "fuse").
> + *
> + * Known options will be removed from *args*, unknown options will
> + * remain. The mountpoint will not be checked here; that is the job of
> + * mount.service.
> + *
> + * @param args argument vector (input+output)
> + * @param opts output argument for parsed options
> + * @return 0 on success, -1 on failure
> + */
> +int fuse_service_parse_cmdline_opts(struct fuse_args *args,
> + struct fuse_cmdline_opts *opts);
> +
> +/**
> + * Don't complain if this file cannot be opened.
> + */
> +#define FUSE_SERVICE_REQUEST_FILE_QUIET (1U << 0)
> +
> +/**
> + * Ask the mount.service helper to open a file on behalf of the fuse server.
> + *
> + * @param sf service context
> + * @param path the path to file
> + * @param open_flags O_ flags
> + * @param create_mode mode with which to create the file
> + * @param request_flags set of FUSE_SERVICE_REQUEST_* flags
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_request_file(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags);
> +
> +/**
> + * Ask the mount.service helper to open a block device on behalf of the fuse
> + * server.
> + *
> + * @param sf service context
> + * @param path the path to file
> + * @param open_flags O_ flags
> + * @param create_mode mode with which to create the file
> + * @param request_flags set of FUSE_SERVICE_REQUEST_* flags
> + * @param block_size set the block device block size to this value
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_request_blockdev(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags,
> + unsigned int block_size);
> +
> +/**
> + * Receive a file previously requested.
> + *
> + * @param sf service context
> + * @param path to file
> + * @fdp pointer to file descriptor, which will be set a non-negative file
> + * descriptor value on success, or negative errno on failure
> + * @return 0 on success, or negative errno on socket communication failure
> + */
> +int fuse_service_receive_file(struct fuse_service *sf,
> + const char *path, int *fdp);
> +
> +/**
> + * Prevent the mount.service server from sending us any more open files.
> + *
> + * @param sf service context
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_finish_file_requests(struct fuse_service *sf);
> +
> +/**
> + * Require that the filesystem mount point have the expected file format
> + * (S_IFDIR/S_IFREG). Can be overridden when calling
> + * fuse_service_session_mount.
> + *
> + * @param sf service context
> + * @param expected_fmt expected mode (S_IFDIR/S_IFREG) for mount point, or 0
> + * to skip checks
> + */
> +void fuse_service_expect_mount_format(struct fuse_service *sf,
> + mode_t expected_fmt);
> +
> +/**
> + * Bind a FUSE file system to the fuse session inside a fuse service process,
> + * then ask the mount.service helper to mount the filesystem for us. The fuse
> + * client will begin sending requests to the fuse server immediately after
> + * this. Do not call fuse_daemonize() when running as a fuse service.
> + *
> + * @param sf service context
> + * @param se fuse session
> + * @param expected_fmt expected mode (S_IFDIR/S_IFREG) for mount point, or 0
> + * to skip checks
> + * @param opts command line options
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_session_mount(struct fuse_service *sf, struct fuse_session *se,
> + mode_t expected_fmt,
> + struct fuse_cmdline_opts *opts);
> +
> +/**
> + * Ask the mount helper to unmount th e filesystem.
> + *
> + * @param sf service context
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_session_unmount(struct fuse_service *sf);
> +
> +/**
> + * Bid farewell to the mount.service helper. It is still necessary to call
> + * fuse_service_destroy after this.
> + *
> + * @param sf service context
> + * @param exitcode fuse server process exit status
> + * @return 0 on success, or negative errno on failure
> + */
> +int fuse_service_send_goodbye(struct fuse_service *sf, int exitcode);
> +
> +/**
> + * Exit routine for a fuse server running as a systemd service.
> + *
> + * @param ret 0 for success, nonzero for service failure.
> + * @return a value to be passed to exit() or returned from main
> + */
> +int fuse_service_exit(int ret);
> +
> +#endif /* FUSE_USE_VERSION >= FUSE_MAKE_VERSION(3, 19) */
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* FUSE_SERVICE_H_ */
> diff --git a/include/fuse_service_priv.h b/include/fuse_service_priv.h
> new file mode 100644
> index 00000000000000..a3773d90c7db7e
> --- /dev/null
> +++ b/include/fuse_service_priv.h
> @@ -0,0 +1,160 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * This program can be distributed under the terms of the GNU LGPLv2.
> + * See the file LGPL2.txt.
> + */
> +#ifndef FUSE_SERVICE_PRIV_H_
> +#define FUSE_SERVICE_PRIV_H_
> +
> +/* All numeric fields are network order (big-endian) when going across the socket */
> +
> +struct fuse_service_memfd_arg {
> + uint32_t pos;
> + uint32_t len;
> +};
> +
> +struct fuse_service_memfd_argv {
> + uint32_t magic;
> + uint32_t argc;
> +};
> +
> +#define FUSE_SERVICE_MAX_CMD_SIZE (65536)
> +
> +#define FUSE_SERVICE_ARGS_MAGIC 0x41524753 /* ARGS */
> +
> +/* mount.service sends a hello to the server and it replies */
> +#define FUSE_SERVICE_HELLO_CMD 0x53414654 /* SAFT */
> +#define FUSE_SERVICE_HELLO_REPLY 0x4c415354 /* LAST */
> +
> +/* fuse servers send commands to mount.service */
> +#define FUSE_SERVICE_OPEN_CMD 0x4f50454e /* OPEN */
> +#define FUSE_SERVICE_OPEN_BDEV_CMD 0x42444556 /* BDEV */
> +#define FUSE_SERVICE_FSOPEN_CMD 0x54595045 /* TYPE */
> +#define FUSE_SERVICE_SOURCE_CMD 0x4e414d45 /* NAME */
> +#define FUSE_SERVICE_MNTOPTS_CMD 0x4f505453 /* OPTS */
> +#define FUSE_SERVICE_MNTPT_CMD 0x4d4e5450 /* MNTP */
> +#define FUSE_SERVICE_MOUNT_CMD 0x444f4954 /* DOIT */
> +#define FUSE_SERVICE_UNMOUNT_CMD 0x554d4e54 /* UMNT */
> +#define FUSE_SERVICE_BYE_CMD 0x42594545 /* BYEE */
> +
> +/* mount.service sends replies to the fuse server */
> +#define FUSE_SERVICE_OPEN_REPLY 0x46494c45 /* FILE */
> +#define FUSE_SERVICE_SIMPLE_REPLY 0x5245504c /* REPL */
> +
> +struct fuse_service_packet {
> + uint32_t magic; /* FUSE_SERVICE_*_{CMD,REPLY} */
> +};
> +
> +#define FUSE_SERVICE_PROTO (1)
> +#define FUSE_SERVICE_MIN_PROTO (1)
> +#define FUSE_SERVICE_MAX_PROTO (1)
> +
> +#define FUSE_SERVICE_FLAG_ALLOW_OTHER (1U << 0)
> +
> +#define FUSE_SERVICE_FLAGS (FUSE_SERVICE_FLAG_ALLOW_OTHER)
> +
> +struct fuse_service_hello {
> + struct fuse_service_packet p;
> + uint16_t min_version;
> + uint16_t max_version;
> + uint32_t flags;
> +};
> +
> +static inline bool check_null_endbyte(const void *p, size_t psz)
> +{
> + return *((const char *)p + psz - 1) == 0;
> +}
> +
> +struct fuse_service_hello_reply {
> + struct fuse_service_packet p;
> + uint16_t version;
> + uint16_t padding;
> +};
> +
> +struct fuse_service_simple_reply {
> + struct fuse_service_packet p;
> + uint32_t error; /* positive errno */
> +};
> +
> +struct fuse_service_requested_file {
> + struct fuse_service_packet p;
> + uint32_t error; /* positive errno */
> + char path[];
> +};
> +
> +static inline size_t sizeof_fuse_service_requested_file(size_t pathlen)
> +{
> + return sizeof(struct fuse_service_requested_file) + pathlen + 1;
> +}
> +
> +#define FUSE_SERVICE_FSOPEN_FUSEBLK (1U << 0)
> +#define FUSE_SERVICE_FSOPEN_FLAGS (FUSE_SERVICE_FSOPEN_FUSEBLK)
> +
> +struct fuse_service_fsopen_command {
> + struct fuse_service_packet p;
> + uint32_t fsopen_flags;
> +};
> +
> +#define FUSE_SERVICE_OPEN_QUIET (1U << 0)
> +#define FUSE_SERVICE_OPEN_FLAGS (FUSE_SERVICE_OPEN_QUIET)
> +
> +struct fuse_service_open_command {
> + struct fuse_service_packet p;
> + uint32_t open_flags;
> + uint32_t create_mode;
> + uint32_t request_flags;
> + uint32_t block_size;
> + char path[];
> +};
> +
> +static inline size_t sizeof_fuse_service_open_command(size_t pathlen)
> +{
> + return sizeof(struct fuse_service_open_command) + pathlen + 1;
> +}
> +
> +struct fuse_service_string_command {
> + struct fuse_service_packet p;
> + char value[];
> +};
> +
> +static inline size_t sizeof_fuse_service_string_command(size_t len)
> +{
> + return sizeof(struct fuse_service_string_command) + len + 1;
> +}
> +
> +struct fuse_service_mountpoint_command {
> + struct fuse_service_packet p;
> + uint16_t expected_fmt;
> + uint16_t padding;
> + char value[];
> +};
> +
> +static inline size_t sizeof_fuse_service_mountpoint_command(size_t len)
> +{
> + return sizeof(struct fuse_service_mountpoint_command) + len + 1;
> +}
> +
> +struct fuse_service_bye_command {
> + struct fuse_service_packet p;
> + uint32_t exitcode;
> +};
> +
> +struct fuse_service_mount_command {
> + struct fuse_service_packet p;
> + uint32_t ms_flags;
> +};
> +
> +struct fuse_service_unmount_command {
> + struct fuse_service_packet p;
> +};
> +
> +int fuse_parse_cmdline_service(struct fuse_args *args,
> + struct fuse_cmdline_opts *opts);
> +
> +#define FUSE_SERVICE_ARGV "argv"
> +#define FUSE_SERVICE_FUSEDEV "fusedev"
> +
> +#endif /* FUSE_SERVICE_PRIV_H_ */
> diff --git a/lib/mount_common_i.h b/lib/mount_common_i.h
> index 6bcb055ff1c23f..631dff3e6f8aaf 100644
> --- a/lib/mount_common_i.h
> +++ b/lib/mount_common_i.h
> @@ -14,5 +14,8 @@ struct mount_opts;
>
> char *fuse_mnt_build_source(const struct mount_opts *mo);
> char *fuse_mnt_build_type(const struct mount_opts *mo);
> +char *fuse_mnt_kernel_opts(const struct mount_opts *mo);
> +unsigned int fuse_mnt_flags(const struct mount_opts *mo);
> +
>
> #endif /* FUSE_MOUNT_COMMON_I_H_ */
> diff --git a/util/mount_service.h b/util/mount_service.h
> new file mode 100644
> index 00000000000000..a0b952a15dacf3
> --- /dev/null
> +++ b/util/mount_service.h
> @@ -0,0 +1,40 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * This program can be distributed under the terms of the GNU GPLv2.
> + * See the file GPL2.txt.
> + */
> +#ifndef MOUNT_SERVICE_H_
> +#define MOUNT_SERVICE_H_
> +
> +/**
> + * Magic value that means that we couldn't connect to the mount service,
> + * so the caller should try to fall back to traditional means.
> + */
> +#define MOUNT_SERVICE_FALLBACK_NEEDED (2)
> +
> +/**
> + * Connect to a fuse service socket and try to mount the filesystem as
> + * specified with the CLI arguments.
> + *
> + * @argc argument count
> + * @argv vector of argument strings
> + * @return EXIT_SUCCESS for success, EXIT_FAILURE if mount fails, or
> + * MOUNT_SERVICE_FALLBACK_NEEDED if no service is available.
> + */
> +int mount_service_main(int argc, char *argv[]);
> +
> +/**
> + * Return the fuse filesystem subtype from a full fuse filesystem type
> + * specification. IOWs, fuse.Y -> Y; fuseblk.Z -> Z; or A -> A. The returned
> + * pointer is within the caller's string. The subtype must not contain a path
> + * separator.
> + *
> + * @param fstype full fuse filesystem type
> + * @return fuse subtype
> + */
> +const char *mount_service_subtype(const char *fstype);
> +
> +#endif /* MOUNT_SERVICE_H_ */
> diff --git a/.github/workflows/install-ubuntu-dependencies.sh b/.github/workflows/install-ubuntu-dependencies.sh
> index 0eb7e610729b7c..9f6e69701438f3 100755
> --- a/.github/workflows/install-ubuntu-dependencies.sh
> +++ b/.github/workflows/install-ubuntu-dependencies.sh
> @@ -15,6 +15,8 @@ PACKAGES_CORE=(
> pkg-config
> python3
> python3-pip
> + libsystemd-dev
> + systemd-dev
> )
>
> PACKAGES_FULL=(
> @@ -31,6 +33,8 @@ PACKAGES_FULL=(
> libudev-dev:i386
> pkg-config:i386
> python3-pytest
> + libsystemd-dev
> + systemd-dev
> )
>
> PACKAGES_CODECHECKER=(
> diff --git a/doc/fuservicemount3.8 b/doc/fuservicemount3.8
> new file mode 100644
> index 00000000000000..e45d6a89c8b81a
> --- /dev/null
> +++ b/doc/fuservicemount3.8
> @@ -0,0 +1,24 @@
> +.TH fuservicemount3 "8"
> +.SH NAME
> +fuservicemount3 \- mount a FUSE filesystem that runs as a system socket service
> +.SH SYNOPSIS
> +.B fuservicemount3
> +.B source
> +.B mountpoint
> +.BI -t " fstype"
> +[
> +.I options
> +]
> +.SH DESCRIPTION
> +Mount a filesystem using a FUSE server that runs as a socket service.
> +These servers can be contained using the platform's service management
> +framework.
> +.SH "AUTHORS"
> +.LP
> +The author of the fuse socket service code is Darrick J. Wong <djwong@kernel.org>.
> +Debian GNU/Linux distribution.
> +.SH SEE ALSO
> +.BR fusermount3 (1)
> +.BR fusermount (1)
> +.BR mount (8)
> +.BR fuse (4)
> diff --git a/doc/meson.build b/doc/meson.build
> index db3e0b26f71975..c105cf3471fdf4 100644
> --- a/doc/meson.build
> +++ b/doc/meson.build
> @@ -2,3 +2,6 @@ if not platform.endswith('bsd') and platform != 'dragonfly'
> install_man('fusermount3.1', 'mount.fuse3.8')
> endif
>
> +if private_cfg.get('HAVE_SERVICEMOUNT', false)
> + install_man('fuservicemount3.8')
> +endif
> diff --git a/include/meson.build b/include/meson.build
> index bf671977a5a6a9..da51180f87eea2 100644
> --- a/include/meson.build
> +++ b/include/meson.build
> @@ -1,4 +1,8 @@
> libfuse_headers = [ 'fuse.h', 'fuse_common.h', 'fuse_lowlevel.h',
> 'fuse_opt.h', 'cuse_lowlevel.h', 'fuse_log.h' ]
>
> +if private_cfg.get('HAVE_SERVICEMOUNT', false)
> + libfuse_headers += [ 'fuse_service.h' ]
> +endif
> +
> install_headers(libfuse_headers, subdir: 'fuse3')
> diff --git a/lib/fuse_service.c b/lib/fuse_service.c
> new file mode 100644
> index 00000000000000..ef512c76120a0f
> --- /dev/null
> +++ b/lib/fuse_service.c
> @@ -0,0 +1,1233 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * Library functions to support fuse servers that can be run as "safe" systemd
> + * containers.
> + *
> + * This program can be distributed under the terms of the GNU LGPLv2.
> + * See the file LGPL2.txt
> + */
> +
> +#define _GNU_SOURCE
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <errno.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <unistd.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <systemd/sd-daemon.h>
> +#include <arpa/inet.h>
> +#include <limits.h>
> +
> +#include "fuse_config.h"
> +#include "fuse_i.h"
> +#include "fuse_service_priv.h"
> +#include "fuse_service.h"
> +#include "mount_common_i.h"
> +
> +struct fuse_service {
> + /* expected file format of the mount point */
> + mode_t expected_fmt;
> +
> + /* socket fd */
> + int sockfd;
> +
> + /* /dev/fuse device */
> + int fusedevfd;
> +
> + /* memfd for cli arguments */
> + int argvfd;
> +
> + /* do we own fusedevfd? */
> + bool owns_fusedevfd;
> +
> + /* can we use allow_other? */
> + bool allow_other;
> +};
> +
> +static int __recv_fd(struct fuse_service *sf,
> + struct fuse_service_requested_file *buf,
> + ssize_t bufsize, int *fdp)
> +{
> + struct iovec iov = {
> + .iov_base = buf,
> + .iov_len = bufsize,
> + };
> + union {
> + struct cmsghdr cmsghdr;
> + char control[CMSG_SPACE(sizeof(int))];
> + } cmsgu = { };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + .msg_control = cmsgu.control,
> +
> + /*
> + * Do not include padding at the end of the control buffer,
> + * because we don't want to receive fds that we weren't
> + * expecting.
> + */
> + .msg_controllen = CMSG_LEN(sizeof(int)),
> + };
> + struct cmsghdr *cmsg;
> + ssize_t size;
> +
> + /*
> + * A kernel LSM could decide to deny the fd transfer by writing a
> + * negative number (== invalid fd) into the cmsg buffer instead of
The kernel doesn't actually do this, so we only need to initialize the
buffer to all 1s to protect against the kernel not writing to the cmsg
buffer, which it shouldn't do without also setting MSG_CTRUNC or
returning an error, but who knows.
> + * installing the fd. Set the initial fd value to -1 to signal an
> + * invalid fd in case the kernel doesn't even set the cmsg buffer.
> + * It shouldn't do that, but we absolutely don't want a zero here.
> + */
> + memset(cmsgu.control, -1, sizeof(cmsgu.control));
> +
> + size = recvmsg(sf->sockfd, &msg, MSG_TRUNC | MSG_CMSG_CLOEXEC);
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service file reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size > bufsize ||
> + size < offsetof(struct fuse_service_requested_file, path)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service file reply size %zd, expected %zd\n",
> + size, bufsize);
> + return -EBADMSG;
> + }
> +
> + if (msg.msg_flags & MSG_CTRUNC) {
> + /* SMACK does this */
> + fuse_log(FUSE_LOG_ERR,
> +"fuse: service file reply control data truncated; did an LSM deny SCM_RIGHTS?\n");
> + return -EBADMSG;
> + }
> +
> + cmsg = CMSG_FIRSTHDR(&msg);
> + if (!cmsg) {
> + /* no control message means mount.service sent us an error */
> + return 0;
> + }
> + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
> + fuse_log(FUSE_LOG_ERR,
> + "fuse: wrong service file reply control data size %zd, expected %zd\n",
> + cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
> + return -EBADMSG;
> + }
> + if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
> + fuse_log(FUSE_LOG_ERR,
> +"fuse: wrong service file reply control data level %d type %d, expected %d and %d\n",
> + cmsg->cmsg_level, cmsg->cmsg_type, SOL_SOCKET,
> + SCM_RIGHTS);
> + return -EBADMSG;
> + }
> +
> + memcpy(fdp, (int *)CMSG_DATA(cmsg), sizeof(int));
> + return 0;
> +}
> +
> +static ssize_t __send_packet(struct fuse_service *sf, void *ptr, size_t len)
> +{
> + struct iovec iov = {
> + .iov_base = ptr,
> + .iov_len = len,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> +
> + return sendmsg(sf->sockfd, &msg, MSG_EOR | MSG_NOSIGNAL);
> +}
> +
> +static ssize_t __recv_packet(struct fuse_service *sf, void *ptr, size_t len)
> +{
> + struct iovec iov = {
> + .iov_base = ptr,
> + .iov_len = len,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> +
> + return recvmsg(sf->sockfd, &msg, MSG_TRUNC);
> +}
> +
> +int fuse_service_receive_file(struct fuse_service *sf, const char *path,
> + int *fdp)
> +{
> + struct fuse_service_requested_file *req;
> + const size_t req_sz = sizeof_fuse_service_requested_file(strlen(path));
> + int fd = -ENOENT;
> + int ret;
> +
> + *fdp = -ENOENT;
> +
> + req = calloc(1, req_sz + 1);
> + if (!req) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: alloc service file reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + ret = __recv_fd(sf, req, req_sz, &fd);
> + if (ret)
> + goto out_req;
> +
> + if (fd < 0) {
> + /* The kernel might have given us an errno instead of an fd */
> + fuse_log(FUSE_LOG_ERR, "fuse: service fd transfer failed: %s\n",
> + strerror(-fd));
> + ret = fd;
> + goto out_req;
> + }
Codex points out that this nearly redundant with the "fd == -ENOENT"
check below and could be combined. Worse, it actually breaks the two
failure reporting cases below, so...
> +
> + if (ntohl(req->p.magic) != FUSE_SERVICE_OPEN_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service file reply contains wrong magic!\n");
> + ret = -EBADMSG;
> + goto out_close;
> + }
> + if (strcmp(req->path, path)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: `%s': not the requested service file, got `%s'\n",
> + path, req->path);
> + ret = -EBADMSG;
> + goto out_close;
> + }
> +
> + if (req->error) {
> + *fdp = -ntohl(req->error);
> + goto out_close;
> + }
> +
> + if (fd == -ENOENT)
> + fuse_log(FUSE_LOG_ERR, "fuse: did not receive `%s' but no error?\n",
> + path);
...I'll compress both clauses into:
if (fd < 0) {
fuse_log(FUSE_LOG_ERR,
"fuse: did not receive valid fd for `%s' but no error?\n",
path);
goto out_req;
}
--D
> +
> + *fdp = fd;
> + goto out_req;
> +
> +out_close:
> + close(fd);
> +out_req:
> + free(req);
> + return ret;
> +}
> +
> +#define FUSE_SERVICE_REQUEST_FILE_FLAGS (FUSE_SERVICE_REQUEST_FILE_QUIET)
> +
> +static int fuse_service_request_path(struct fuse_service *sf, const char *path,
> + mode_t expected_fmt, int open_flags,
> + mode_t create_mode,
> + unsigned int request_flags,
> + unsigned int block_size)
> +{
> + struct fuse_service_open_command *cmd;
> + const size_t cmdsz = sizeof_fuse_service_open_command(strlen(path));
> + ssize_t size;
> + unsigned int rqflags = 0;
> + int ret;
> +
> + if (request_flags & ~FUSE_SERVICE_REQUEST_FILE_FLAGS) {
> + fuse_log(FUSE_LOG_ERR, "fuse: invalid fuse service file request flags 0x%x\n",
> + request_flags);
> + return -EINVAL;
> + }
> +
> + if (request_flags & FUSE_SERVICE_REQUEST_FILE_QUIET)
> + rqflags |= FUSE_SERVICE_OPEN_QUIET;
> +
> + cmd = calloc(1, cmdsz);
> + if (!cmd) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: alloc service file request: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (S_ISBLK(expected_fmt)) {
> + cmd->p.magic = htonl(FUSE_SERVICE_OPEN_BDEV_CMD);
> + cmd->block_size = htonl(block_size);
> + } else {
> + cmd->p.magic = htonl(FUSE_SERVICE_OPEN_CMD);
> + }
> + cmd->open_flags = htonl(open_flags);
> + cmd->create_mode = htonl(create_mode);
> + cmd->request_flags = htonl(rqflags);
> + strcpy(cmd->path, path);
> +
> + size = __send_packet(sf, cmd, cmdsz);
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: request service file: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_free;
> + }
> +
> + ret = 0;
> +out_free:
> + free(cmd);
> + return ret;
> +}
> +
> +int fuse_service_request_file(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags)
> +{
> + return fuse_service_request_path(sf, path, S_IFREG, open_flags,
> + create_mode, request_flags, 0);
> +}
> +
> +int fuse_service_request_blockdev(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags,
> + unsigned int block_size)
> +{
> + return fuse_service_request_path(sf, path, S_IFBLK, open_flags,
> + create_mode, request_flags,
> + block_size);
> +}
> +
> +int fuse_service_send_goodbye(struct fuse_service *sf, int exitcode)
> +{
> + struct fuse_service_bye_command c = {
> + .p.magic = htonl(FUSE_SERVICE_BYE_CMD),
> + .exitcode = htonl(exitcode),
> + };
> + ssize_t size;
> +
> + /* already gone? */
> + if (sf->sockfd < 0)
> + return 0;
> +
> + size = __send_packet(sf, &c, sizeof(c));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service goodbye: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + shutdown(sf->sockfd, SHUT_RDWR);
> + close(sf->sockfd);
> + sf->sockfd = -1;
> + return 0;
> +}
> +
> +static int count_listen_fds(void)
> +{
> + char *listen_fds;
> + char *listen_pid;
> + char *p;
> + long l;
> +
> + /*
> + * No environment variables means we're not running as a system socket
> + * service, so we'll back out without logging anything.
> + */
> + listen_fds = getenv("LISTEN_FDS");
> + listen_pid = getenv("LISTEN_PID");
> + if (!listen_fds || !listen_pid)
> + return 0;
> +
> + /*
> + * LISTEN_PID is the pid of the process to which systemd thinks it gave
> + * the socket fd. Hopefully that's us.
> + */
> + errno = 0;
> + l = strtol(listen_pid, &p, 10);
> + if (errno || *p != 0 || l != getpid())
> + return 0;
> +
> + /*
> + * LISTEN_FDS is the number of sockets that were opened in this
> + * process.
> + */
> + errno = 0;
> + l = strtol(listen_fds, &p, 10);
> + if (errno || *p != 0 || l > INT_MAX || l < 0)
> + return 0;
> +
> + return l;
> +}
> +
> +static int check_sendbuf_size(int sockfd)
> +{
> + const size_t min_size = sizeof_fuse_service_open_command(PATH_MAX);
> + int sendbuf_size = -1;
> + socklen_t optlen = sizeof(sendbuf_size);
> + int ret;
> +
> + /*
> + * If we can't query the maximum send buffer length, just keep going.
> + * Most likely we won't be sending huge open commands, and if we do,
> + * the sendmsg will fail there too.
> + */
> + ret = getsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
> + if (ret || sendbuf_size < 0)
> + return 0;
> +
> + if (sendbuf_size >= min_size)
> + return 0;
> +
> + fuse_log(FUSE_LOG_ERR, "max socket send buffer is %d, need at least %zu.\n",
> + sendbuf_size, min_size);
> + return -ENOBUFS;
> +}
> +
> +static int find_socket_fd(int nr_fds)
> +{
> + struct stat stbuf;
> + struct sockaddr_un urk;
> + socklen_t urklen = sizeof(urk);
> + int ret;
> +
> + if (nr_fds != 1) {
> + fuse_log(FUSE_LOG_ERR, "fuse: can only handle 1 service socket, got %d.\n",
> + nr_fds);
> + return -E2BIG;
> + }
> +
> + ret = fstat(SD_LISTEN_FDS_START, &stbuf);
> + if (ret) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service socket: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + if (!S_ISSOCK(stbuf.st_mode)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: expected service fd %d to be a socket\n",
> + SD_LISTEN_FDS_START);
> + return -ENOTSOCK;
> + }
> +
> + ret = getsockname(SD_LISTEN_FDS_START, &urk, &urklen);
> + if (ret < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service socket family: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + if (ret > 0 || urk.sun_family != AF_UNIX) {
> + /*
> + * If getsockname wanted to return more data than fits in a
> + * sockaddr_un, then it's obviously not an AF_UNIX socket.
> + *
> + * If it filled the buffer exactly but the family isn't AF_UNIX
> + * then we also return false.
> + */
> + fuse_log(FUSE_LOG_ERR, "fuse: service socket is not AF_UNIX\n");
> + return -EAFNOSUPPORT;
> + }
> +
> + ret = check_sendbuf_size(SD_LISTEN_FDS_START);
> + if (ret)
> + return ret;
> +
> + return SD_LISTEN_FDS_START;
> +}
> +
> +static int negotiate_hello(struct fuse_service *sf)
> +{
> + struct fuse_service_hello hello = { };
> + struct fuse_service_hello_reply reply = {
> + .p.magic = htonl(FUSE_SERVICE_HELLO_REPLY),
> + .version = htons(FUSE_SERVICE_PROTO),
> + };
> + uint32_t flags;
> + ssize_t size;
> +
> + size = __recv_packet(sf, &hello, sizeof(hello));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: receive service hello: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(hello)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service hello size %zd, expected %zd\n",
> + size, sizeof(hello));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(hello.p.magic) != FUSE_SERVICE_HELLO_CMD) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service server did not send hello command\n");
> + return -EBADMSG;
> + }
> +
> + if (ntohs(hello.min_version) < FUSE_SERVICE_MIN_PROTO) {
> + fuse_log(FUSE_LOG_ERR, "fuse: unsupported min service protocol version %u\n",
> + ntohs(hello.min_version));
> + return -EOPNOTSUPP;
> + }
> +
> + if (ntohs(hello.max_version) > FUSE_SERVICE_MAX_PROTO) {
> + fuse_log(FUSE_LOG_ERR, "fuse: unsupported max service protocol version %u\n",
> + ntohs(hello.min_version));
> + return -EOPNOTSUPP;
> + }
> +
> + flags = ntohl(hello.flags);
> + if (flags & ~FUSE_SERVICE_FLAGS) {
> + fprintf(stderr, "fuse: invalid hello flags: 0x%x\n",
> + flags & ~FUSE_SERVICE_FLAGS);
> + return -EINVAL;
> + }
> +
> + if (flags & FUSE_SERVICE_FLAG_ALLOW_OTHER)
> + sf->allow_other = true;
> +
> + size = __send_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service hello reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + return 0;
> +}
> +
> +int fuse_service_accept(struct fuse_service **sfp)
> +{
> + struct fuse_service *sf;
> + int nr_fds;
> + int sockfd;
> + int flags;
> + int ret = 0;
> +
> + *sfp = NULL;
> +
> + nr_fds = count_listen_fds();
> + if (nr_fds == 0)
> + return 0;
> +
> + /* Find the socket that connects us to mount.service */
> + sockfd = find_socket_fd(nr_fds);
> + if (sockfd < 0)
> + return sockfd;
> +
> + flags = fcntl(sockfd, F_GETFD);
> + if (flags < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service socket getfd: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + if (!(flags & FD_CLOEXEC)) {
> + ret = fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC);
> + if (ret) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service socket set cloexec: %s\n",
> + strerror(error));
> + return -error;
> + }
> + }
> +
> + sf = calloc(1, sizeof(struct fuse_service));
> + if (!sf) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service alloc: %s\n",
> + strerror(error));
> + return -error;
> + }
> + sf->sockfd = sockfd;
> +
> + ret = negotiate_hello(sf);
> + if (ret)
> + goto out_sf;
> +
> + /* Receive the two critical sockets */
> + ret = fuse_service_receive_file(sf, FUSE_SERVICE_ARGV, &sf->argvfd);
> + if (ret < 0)
> + goto out_sockfd;
> + if (sf->argvfd < 0) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service mount options file: %s\n",
> + strerror(-sf->argvfd));
> + ret = sf->argvfd;
> + goto out_sockfd;
> + }
> +
> + ret = fuse_service_receive_file(sf, FUSE_SERVICE_FUSEDEV,
> + &sf->fusedevfd);
> + if (ret < 0)
> + goto out_argvfd;
> + if (sf->fusedevfd < 0) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fuse device: %s\n",
> + strerror(-sf->fusedevfd));
> + ret = sf->fusedevfd;
> + goto out_argvfd;
> + }
> +
> + sf->owns_fusedevfd = true;
> + *sfp = sf;
> + return 0;
> +
> +out_argvfd:
> + close(sf->argvfd);
> +out_sockfd:
> + shutdown(sf->sockfd, SHUT_RDWR);
> + close(sf->sockfd);
> +out_sf:
> + free(sf);
> + return ret;
> +}
> +
> +bool fuse_service_can_allow_other(struct fuse_service *sf)
> +{
> + return sf->allow_other;
> +}
> +
> +int fuse_service_append_args(struct fuse_service *sf,
> + struct fuse_args *existing_args)
> +{
> + struct fuse_service_memfd_argv memfd_args = { };
> + struct fuse_args new_args = {
> + .allocated = 1,
> + };
> + char *str = NULL;
> + off_t memfd_pos = 0;
> + ssize_t received;
> + unsigned int i;
> + int ret;
> +
> + /* Figure out how many arguments we're getting from the mount helper. */
> + received = pread(sf->argvfd, &memfd_args, sizeof(memfd_args), 0);
> + if (received < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (received < sizeof(memfd_args)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file length unreadable\n");
> + return -EBADMSG;
> + }
> + if (ntohl(memfd_args.magic) != FUSE_SERVICE_ARGS_MAGIC) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file corrupt\n");
> + return -EBADMSG;
> + }
> + memfd_args.magic = htonl(memfd_args.magic);
> + memfd_args.argc = htonl(memfd_args.argc);
> + memfd_pos += sizeof(memfd_args);
> +
> + /* Allocate a new array of argv string pointers */
> + new_args.argv = calloc(memfd_args.argc + existing_args->argc,
> + sizeof(char *));
> + if (!new_args.argv) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service new args: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + /*
> + * Copy the fuse server's CLI arguments. We'll leave new_args.argv[0]
> + * unset for now, because we'll set it in the next step with the fstype
> + * that the mount helper sent us.
> + */
> + new_args.argc++;
> + for (i = 1; i < existing_args->argc; i++) {
> + if (existing_args->allocated) {
> + new_args.argv[new_args.argc] = existing_args->argv[i];
> + existing_args->argv[i] = NULL;
> + } else {
> + char *dup = strdup(existing_args->argv[i]);
> +
> + if (!dup) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR,
> + "fuse: service duplicate existing args: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_new_args;
> + }
> +
> + new_args.argv[new_args.argc] = dup;
> + }
> +
> + new_args.argc++;
> + }
> +
> + /* Copy the rest of the arguments from the helper */
> + for (i = 0; i < memfd_args.argc; i++) {
> + struct fuse_service_memfd_arg memfd_arg = { };
> +
> + /* Read argv iovec */
> + received = pread(sf->argvfd, &memfd_arg, sizeof(memfd_arg),
> + memfd_pos);
> + if (received < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file iovec read: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_new_args;
> + }
> + if (received < sizeof(struct fuse_service_memfd_arg)) {
> + fuse_log(FUSE_LOG_ERR,
> + "fuse: service args file argv[%u] iovec short read %zd",
> + i, received);
> + ret = -EBADMSG;
> + goto out_new_args;
> + }
> + memfd_arg.pos = htonl(memfd_arg.pos);
> + memfd_arg.len = htonl(memfd_arg.len);
> + memfd_pos += sizeof(memfd_arg);
> +
> + /* read arg string from file */
> + str = calloc(1, memfd_arg.len + 1);
> + if (!str) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service arg alloc: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_new_args;
> + }
> +
> + received = pread(sf->argvfd, str, memfd_arg.len, memfd_arg.pos);
> + if (received < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file read: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_str;
> + }
> + if (received < memfd_arg.len) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service args file argv[%u] short read %zd",
> + i, received);
> + ret = -EBADMSG;
> + goto out_str;
> + }
> +
> + /* move string into the args structure */
> + if (i == 0) {
> + /* the first argument is the fs type */
> + new_args.argv[0] = str;
> + } else {
> + new_args.argv[new_args.argc] = str;
> + new_args.argc++;
> + }
> + str = NULL;
> + }
> +
> + /* drop existing args, move new args to existing args */
> + fuse_opt_free_args(existing_args);
> + memcpy(existing_args, &new_args, sizeof(*existing_args));
> +
> + close(sf->argvfd);
> + sf->argvfd = -1;
> +
> + return 0;
> +
> +out_str:
> + free(str);
> +out_new_args:
> + fuse_opt_free_args(&new_args);
> + return ret;
> +}
> +
> +#ifdef SO_PASSRIGHTS
> +int fuse_service_finish_file_requests(struct fuse_service *sf)
> +{
> + int zero = 0;
> + int ret;
> +
> + /*
> + * Don't let a malicious mount helper send us more fds. If the kernel
> + * doesn't know about this new(ish) option that's ok, we'll trust the
> + * servicemount helper.
> + */
> + ret = setsockopt(sf->sockfd, SOL_SOCKET, SO_PASSRIGHTS, &zero,
> + sizeof(zero));
> + if (ret && errno == ENOPROTOOPT)
> + ret = 0;
> + if (ret) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: disabling fd passing: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + return 0;
> +}
> +#else
> +int fuse_service_finish_file_requests(struct fuse_service *sf)
> +{
> + (void)sf;
> + return 0;
> +}
> +#endif
> +
> +static int send_fsopen(struct fuse_service *sf, const char *fstype,
> + int *errorp)
> +{
> + struct fuse_service_simple_reply reply = { };
> + struct fuse_service_fsopen_command c = {
> + .p.magic = htonl(FUSE_SERVICE_FSOPEN_CMD),
> + };
> + ssize_t size;
> +
> + if (!strncmp(fstype, "fuseblk", 7))
> + c.fsopen_flags |= htonl(FUSE_SERVICE_FSOPEN_FUSEBLK);
> +
> + size = __send_packet(sf, &c, sizeof(c));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service fsopen command: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + size = __recv_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service fsopen reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(reply)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service fsopen reply size %zd, expected %zd\n",
> + size, sizeof(reply));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_SIMPLE_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fsopen reply contains wrong magic!\n");
> + return -EBADMSG;
> + }
> +
> + *errorp = ntohl(reply.error);
> + return 0;
> +}
> +
> +static int send_string(struct fuse_service *sf, uint32_t command,
> + const char *value, int *errorp)
> +{
> + struct fuse_service_simple_reply reply = { };
> + struct fuse_service_string_command *cmd;
> + const size_t cmdsz = sizeof_fuse_service_string_command(strlen(value));
> + ssize_t size;
> +
> + cmd = calloc(1, cmdsz);
> + if (!cmd) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: alloc service string send: %s\n",
> + strerror(error));
> + return -error;
> + }
> + cmd->p.magic = htonl(command);
> + strcpy(cmd->value, value);
> +
> + size = __send_packet(sf, cmd, cmdsz);
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service string: %s\n",
> + strerror(error));
> + return -error;
> + }
> + free(cmd);
> +
> + size = __recv_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service string reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(reply)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service string reply size %zd, expected %zd\n",
> + size, sizeof(reply));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_SIMPLE_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service string reply contains wrong magic!\n");
> + return -EBADMSG;
> + }
> +
> + *errorp = ntohl(reply.error);
> + return 0;
> +}
> +
> +static int send_mountpoint(struct fuse_service *sf, mode_t expected_fmt,
> + const char *value, int *errorp)
> +{
> + struct fuse_service_simple_reply reply = { };
> + struct fuse_service_mountpoint_command *cmd;
> + const size_t cmdsz =
> + sizeof_fuse_service_mountpoint_command(strlen(value));
> + ssize_t size;
> +
> + cmd = calloc(1, cmdsz);
> + if (!cmd) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: alloc service mountpoint send: %s\n",
> + strerror(error));
> + return -error;
> + }
> + cmd->p.magic = htonl(FUSE_SERVICE_MNTPT_CMD);
> + cmd->expected_fmt = htons(expected_fmt);
> + strcpy(cmd->value, value);
> +
> + size = __send_packet(sf, cmd, cmdsz);
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service mountpoint: %s\n",
> + strerror(error));
> + return -error;
> + }
> + free(cmd);
> +
> + size = __recv_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service mountpoint reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(reply)) {
> + fuse_log(FUSE_LOG_ERR,
> + "fuse: wrong service mountpoint reply size %zd, expected %zd\n",
> + size, sizeof(reply));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_SIMPLE_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service mountpoint reply contains wrong magic!\n");
> + return -EBADMSG;
> + }
> +
> + *errorp = ntohl(reply.error);
> + return 0;
> +}
> +
> +static int send_mount(struct fuse_service *sf, unsigned int ms_flags,
> + int *errorp)
> +{
> + struct fuse_service_simple_reply reply = { };
> + struct fuse_service_mount_command c = {
> + .p.magic = htonl(FUSE_SERVICE_MOUNT_CMD),
> + .ms_flags = htonl(ms_flags),
> + };
> + ssize_t size;
> +
> + size = __send_packet(sf, &c, sizeof(c));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service mount command: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + size = __recv_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service mount reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(reply)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service mount reply size %zd, expected %zd\n",
> + size, sizeof(reply));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_SIMPLE_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service mount reply contains wrong magic!\n");
> + return -EBADMSG;
> + }
> +
> + *errorp = ntohl(reply.error);
> + return 0;
> +}
> +
> +void fuse_service_expect_mount_format(struct fuse_service *sf,
> + mode_t expected_fmt)
> +{
> + sf->expected_fmt = expected_fmt;
> +}
> +
> +int fuse_service_session_mount(struct fuse_service *sf, struct fuse_session *se,
> + mode_t expected_fmt,
> + struct fuse_cmdline_opts *opts)
> +{
> + char *fstype = fuse_mnt_build_type(se->mo);
> + char *source = fuse_mnt_build_source(se->mo);
> + char *mntopts = fuse_mnt_kernel_opts(se->mo);
> + char path[32];
> + int ret;
> + int error = 0;
> +
> + if (!fstype || !source) {
> + fuse_log(FUSE_LOG_ERR, "fuse: cannot allocate service strings\n");
> + ret = -ENOMEM;
> + goto out_strings;
> + }
> +
> + if (!expected_fmt)
> + expected_fmt = sf->expected_fmt;
> +
> + /*
> + * The fuse session takes the fusedev fd if this succeeds. It is
> + * required to use the "/dev/fd/XX" format.
> + */
> + snprintf(path, sizeof(path), "/dev/fd/%d", sf->fusedevfd);
> + errno = 0;
> + ret = fuse_session_mount(se, path);
> + if (ret) {
> + /* Try to return richer errors than fuse_session_mount's -1 */
> + ret = errno ? -errno : -EINVAL;
> + goto out_strings;
> + }
> + sf->owns_fusedevfd = false;
> +
> + ret = send_fsopen(sf, fstype, &error);
> + if (ret)
> + goto out_strings;
> + if (error) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fsopen: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_strings;
> + }
> +
> + ret = send_string(sf, FUSE_SERVICE_SOURCE_CMD, source, &error);
> + if (ret)
> + goto out_strings;
> + if (error) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fs source: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_strings;
> + }
> +
> + ret = send_mountpoint(sf, expected_fmt, opts->mountpoint, &error);
> + if (ret)
> + goto out_strings;
> + if (error) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fs mountpoint: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_strings;
> + }
> +
> + if (mntopts) {
> + ret = send_string(sf, FUSE_SERVICE_MNTOPTS_CMD, mntopts,
> + &error);
> + if (ret)
> + goto out_strings;
> + if (error) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service fs mount options: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_strings;
> + }
> + }
> +
> + ret = send_mount(sf, fuse_mnt_flags(se->mo), &error);
> + if (ret)
> + goto out_strings;
> + if (error) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service mount: %s\n",
> + strerror(error));
> + ret = -error;
> + goto out_strings;
> + }
> +
> + /*
> + * foreground mode is needed so that systemd actually tracks the
> + * service correctly and doesn't try to kill it; and so that
> + * stdout/stderr don't get zapped. Change to the root directory so
> + * that the caller needn't call fuse_daemonize().
> + */
> + opts->foreground = 1;
> + (void)chdir("/");
> +
> +out_strings:
> + free(mntopts);
> + free(source);
> + free(fstype);
> + return ret;
> +}
> +
> +int fuse_service_session_unmount(struct fuse_service *sf)
> +{
> + struct fuse_service_simple_reply reply = { };
> + struct fuse_service_unmount_command c = {
> + .p.magic = htonl(FUSE_SERVICE_UNMOUNT_CMD),
> + };
> + ssize_t size;
> +
> + /* already gone? */
> + if (sf->sockfd < 0)
> + return 0;
> +
> + size = __send_packet(sf, &c, sizeof(c));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: send service unmount: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + size = __recv_packet(sf, &reply, sizeof(reply));
> + if (size < 0) {
> + int error = errno;
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service unmount reply: %s\n",
> + strerror(error));
> + return -error;
> + }
> + if (size != sizeof(reply)) {
> + fuse_log(FUSE_LOG_ERR, "fuse: wrong service unmount reply size %zd, expected %zd\n",
> + size, sizeof(reply));
> + return -EBADMSG;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_SIMPLE_REPLY) {
> + fuse_log(FUSE_LOG_ERR, "fuse: service unmount reply contains wrong magic!\n");
> + return -EBADMSG;
> + }
> +
> + if (reply.error) {
> + int error = ntohl(reply.error);
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: service unmount: %s\n",
> + strerror(error));
> + return -error;
> + }
> +
> + return 0;
> +}
> +
> +void fuse_service_release(struct fuse_service *sf)
> +{
> + if (sf->owns_fusedevfd)
> + close(sf->fusedevfd);
> + sf->owns_fusedevfd = false;
> + sf->fusedevfd = -1;
> + close(sf->argvfd);
> + sf->argvfd = -1;
> + shutdown(sf->sockfd, SHUT_RDWR);
> + close(sf->sockfd);
> + sf->sockfd = -1;
> +}
> +
> +void fuse_service_destroy(struct fuse_service **sfp)
> +{
> + struct fuse_service *sf = *sfp;
> +
> + if (sf) {
> + fuse_service_release(*sfp);
> + free(sf);
> + }
> +
> + *sfp = NULL;
> +}
> +
> +char *fuse_service_cmdline(int argc, char *argv[], struct fuse_args *args)
> +{
> + char *p, *dst;
> + size_t len = 1;
> + ssize_t ret;
> + char *argv0;
> + unsigned int i;
> +
> + /* Try to preserve argv[0] */
> + if (argc > 0)
> + argv0 = argv[0];
> + else if (args->argc > 0)
> + argv0 = args->argv[0];
> + else
> + return NULL;
> +
> + /* Pick up the alleged fstype from args->argv[0] */
> + if (args->argc == 0)
> + return NULL;
> +
> + len += strlen(argv0) + 1;
> + len += 3; /* " -t" */
> + for (i = 0; i < args->argc; i++)
> + len += strlen(args->argv[i]) + 1;
> +
> + p = calloc(1, len);
> + if (!p)
> + return NULL;
> + dst = p;
> +
> + /* Format: argv0 -t alleged_fstype [all other options...] */
> + ret = sprintf(dst, "%s -t", argv0);
> + dst += ret;
> + for (i = 0; i < args->argc; i++) {
> + ret = sprintf(dst, " %s", args->argv[i]);
> + dst += ret;
> + }
> +
> + return p;
> +}
> +
> +int fuse_service_parse_cmdline_opts(struct fuse_args *args,
> + struct fuse_cmdline_opts *opts)
> +{
> + return fuse_parse_cmdline_service(args, opts);
> +}
> +
> +int fuse_service_exit(int ret)
> +{
> + /*
> + * We have to sleep 2 seconds here because journald uses the pid to
> + * connect our log messages to the systemd service. This is critical
> + * for capturing all the log messages if the service fails, because
> + * failure analysis tools use the service name to gather log messages
> + * for reporting.
> + */
> + sleep(2);
> +
> + /*
> + * If we're being run as a service, the return code must fit the LSB
> + * init script action error guidelines, which is to say that we
> + * compress all errors to 1 ("generic or unspecified error", LSB 5.0
> + * section 22.2) and hope the admin will scan the log for what actually
> + * happened.
> + */
> + return ret != 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> +}
> diff --git a/lib/fuse_service_stub.c b/lib/fuse_service_stub.c
> new file mode 100644
> index 00000000000000..d34df3891a6e31
> --- /dev/null
> +++ b/lib/fuse_service_stub.c
> @@ -0,0 +1,106 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * Stub functions for platforms where we cannot have fuse servers run as "safe"
> + * systemd containers.
> + *
> + * This program can be distributed under the terms of the GNU LGPLv2.
> + * See the file LGPL2.txt
> + */
> +
> +/* we don't use any parameters at all */
> +#pragma GCC diagnostic ignored "-Wunused-parameter"
> +
> +#define _GNU_SOURCE
> +#include <errno.h>
> +
> +#include "fuse_config.h"
> +#include "fuse_i.h"
> +#include "fuse_service.h"
> +
> +int fuse_service_receive_file(struct fuse_service *sf, const char *path,
> + int *fdp)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +int fuse_service_request_file(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +int fuse_service_request_blockdev(struct fuse_service *sf, const char *path,
> + int open_flags, mode_t create_mode,
> + unsigned int request_flags,
> + unsigned int block_size)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +int fuse_service_send_goodbye(struct fuse_service *sf, int error)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +int fuse_service_accept(struct fuse_service **sfp)
> +{
> + *sfp = NULL;
> + return 0;
> +}
> +
> +int fuse_service_append_args(struct fuse_service *sf,
> + struct fuse_args *existing_args)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +char *fuse_service_cmdline(int argc, char *argv[], struct fuse_args *args)
> +{
> + return NULL;
> +}
> +
> +int fuse_service_finish_file_requests(struct fuse_service *sf)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +void fuse_service_expect_mount_format(struct fuse_service *sf,
> + mode_t expected_fmt)
> +{
> +}
> +
> +int fuse_service_session_mount(struct fuse_service *sf, struct fuse_session *se,
> + mode_t expected_fmt,
> + struct fuse_cmdline_opts *opts)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +int fuse_service_session_unmount(struct fuse_service *sf)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +void fuse_service_release(struct fuse_service *sf)
> +{
> +}
> +
> +void fuse_service_destroy(struct fuse_service **sfp)
> +{
> + *sfp = NULL;
> +}
> +
> +int fuse_service_parse_cmdline_opts(struct fuse_args *args,
> + struct fuse_cmdline_opts *opts)
> +{
> + return -1;
> +}
> +
> +int fuse_service_exit(int ret)
> +{
> + return ret;
> +}
> diff --git a/lib/fuse_versionscript b/lib/fuse_versionscript
> index cce09610316f4b..f34dc959a1d1e1 100644
> --- a/lib/fuse_versionscript
> +++ b/lib/fuse_versionscript
> @@ -227,6 +227,23 @@ FUSE_3.19 {
> fuse_session_start_teardown_watchdog;
> fuse_session_stop_teardown_watchdog;
> fuse_lowlevel_notify_prune;
> +
> + fuse_service_accept;
> + fuse_service_append_args;
> + fuse_service_can_allow_other;
> + fuse_service_cmdline;
> + fuse_service_destroy;
> + fuse_service_exit;
> + fuse_service_expect_mount_format;
> + fuse_service_finish_file_requests;
> + fuse_service_parse_cmdline_opts;
> + fuse_service_receive_file;
> + fuse_service_release;
> + fuse_service_request_file;
> + fuse_service_request_blockdev;
> + fuse_service_send_goodbye;
> + fuse_service_session_mount;
> + fuse_service_session_unmount;
> } FUSE_3.18;
>
> # Local Variables:
> diff --git a/lib/helper.c b/lib/helper.c
> index 74906fdcbd76d9..819b9a6e4d243c 100644
> --- a/lib/helper.c
> +++ b/lib/helper.c
> @@ -26,6 +26,11 @@
> #include <errno.h>
> #include <sys/param.h>
>
> +#ifdef HAVE_SERVICEMOUNT
> +# include <linux/types.h>
> +# include "fuse_service_priv.h"
> +#endif
> +
> #define FUSE_HELPER_OPT(t, p) \
> { t, offsetof(struct fuse_cmdline_opts, p), 1 }
>
> @@ -228,6 +233,52 @@ int fuse_parse_cmdline_312(struct fuse_args *args,
> return 0;
> }
>
> +#ifdef HAVE_SERVICEMOUNT
> +static int fuse_helper_opt_proc_service(void *data, const char *arg, int key,
> + struct fuse_args *outargs)
> +{
> + (void) outargs;
> + struct fuse_cmdline_opts *opts = data;
> +
> + switch (key) {
> + case FUSE_OPT_KEY_NONOPT:
> + if (!opts->mountpoint)
> + return fuse_opt_add_opt(&opts->mountpoint, arg);
> +
> + fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg);
> + return -1;
> + default:
> + /* Pass through unknown options */
> + return 1;
> + }
> +}
> +
> +int fuse_parse_cmdline_service(struct fuse_args *args,
> + struct fuse_cmdline_opts *opts)
> +{
> + memset(opts, 0, sizeof(struct fuse_cmdline_opts));
> +
> + opts->max_idle_threads = UINT_MAX; /* new default in fuse version 3.12 */
> + opts->max_threads = 10;
> +
> + if (fuse_opt_parse(args, opts, fuse_helper_opts,
> + fuse_helper_opt_proc_service) == -1)
> + return -1;
> +
> + /*
> + * *Linux*: if neither -o subtype nor -o fsname are specified,
> + * set subtype to program's basename.
> + * *FreeBSD*: if fsname is not specified, set to program's
> + * basename.
> + */
> + if (!opts->nodefault_subtype)
> + if (add_default_subtype(args->argv[0], args) == -1)
> + return -1;
> +
> + return 0;
> +}
> +#endif
> +
> /**
> * struct fuse_cmdline_opts got extended in libfuse-3.12
> */
> diff --git a/lib/meson.build b/lib/meson.build
> index fcd95741c9d374..d9a902f74b558f 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -10,6 +10,12 @@ else
> libfuse_sources += [ 'mount_bsd.c' ]
> endif
>
> +if private_cfg.get('HAVE_SERVICEMOUNT', false)
> + libfuse_sources += [ 'fuse_service.c' ]
> +else
> + libfuse_sources += [ 'fuse_service_stub.c' ]
> +endif
> +
> deps = [ thread_dep ]
> if private_cfg.get('HAVE_ICONV')
> libfuse_sources += [ 'modules/iconv.c' ]
> @@ -49,18 +55,25 @@ libfuse = library('fuse3',
> dependencies: deps,
> install: true,
> link_depends: 'fuse_versionscript',
> - c_args: [ '-DFUSE_USE_VERSION=317',
> + c_args: [ '-DFUSE_USE_VERSION=319',
> '-DFUSERMOUNT_DIR="@0@"'.format(fusermount_path) ],
> link_args: ['-Wl,--version-script,' + meson.current_source_dir()
> + '/fuse_versionscript' ])
>
> +vars = []
> +if private_cfg.get('HAVE_SERVICEMOUNT', false)
> + service_socket_dir = private_cfg.get_unquoted('FUSE_SERVICE_SOCKET_DIR', '')
> + vars += ['service_socket_dir=' + service_socket_dir]
> + vars += ['service_socket_perms=' + service_socket_perms]
> +endif
> pkg = import('pkgconfig')
> pkg.generate(libraries: [ libfuse, '-lpthread' ],
> libraries_private: '-ldl',
> version: meson.project_version(),
> name: 'fuse3',
> description: 'Filesystem in Userspace',
> - subdirs: 'fuse3')
> + subdirs: 'fuse3',
> + variables: vars)
>
> libfuse_dep = declare_dependency(include_directories: include_dirs,
> link_with: libfuse, dependencies: deps)
> diff --git a/lib/mount.c b/lib/mount.c
> index 2397c3fb2aa26b..952d8899dcf218 100644
> --- a/lib/mount.c
> +++ b/lib/mount.c
> @@ -750,3 +750,15 @@ char *fuse_mnt_build_type(const struct mount_opts *mo)
>
> return type;
> }
> +
> +char *fuse_mnt_kernel_opts(const struct mount_opts *mo)
> +{
> + if (mo->kernel_opts)
> + return strdup(mo->kernel_opts);
> + return NULL;
> +}
> +
> +unsigned int fuse_mnt_flags(const struct mount_opts *mo)
> +{
> + return mo->flags;
> +}
> diff --git a/meson.build b/meson.build
> index 80c5f1dc0bd356..66425a0d4cc16f 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -69,6 +69,16 @@ args_default = [ '-D_GNU_SOURCE' ]
> #
> private_cfg = configuration_data()
> private_cfg.set_quoted('PACKAGE_VERSION', meson.project_version())
> +service_socket_dir = get_option('service-socket-dir')
> +service_socket_perms = get_option('service-socket-perms')
> +if service_socket_dir == ''
> + service_socket_dir = '/run/filesystems'
> +endif
> +if service_socket_perms == ''
> + service_socket_perms = '0220'
> +endif
> +private_cfg.set_quoted('FUSE_SERVICE_SOCKET_DIR', service_socket_dir)
> +private_cfg.set('FUSE_SERVICE_SOCKET_PERMS', service_socket_perms)
>
> # Test for presence of some functions
> test_funcs = [ 'fork', 'fstatat', 'openat', 'readlinkat', 'pipe2',
> @@ -118,6 +128,13 @@ special_funcs = {
> return -1;
> }
> }
> + ''',
> + 'systemd_headers': '''
> + #include <systemd/sd-daemon.h>
> +
> + int main(int argc, char *argv[]) {
> + return SD_LISTEN_FDS_START;
> + }
> '''
> }
>
> @@ -180,6 +197,23 @@ if get_option('enable-io-uring') and liburing.found() and libnuma.found()
> endif
> endif
>
> +# Check for systemd support
> +systemd_system_unit_dir = get_option('systemd-system-unit-dir')
> +if systemd_system_unit_dir == ''
> + systemd = dependency('systemd', required: false)
> + if systemd.found()
> + systemd_system_unit_dir = systemd.get_variable(pkgconfig: 'systemd_system_unit_dir')
> + endif
> +endif
> +
> +if systemd_system_unit_dir == '' or private_cfg.get('HAVE_SYSTEMD_HEADERS', false) == false
> + warning('systemd service support will not be built')
> +else
> + private_cfg.set_quoted('SYSTEMD_SYSTEM_UNIT_DIR', systemd_system_unit_dir)
> + private_cfg.set('HAVE_SYSTEMD', true)
> + private_cfg.set('HAVE_SERVICEMOUNT', true)
> +endif
> +
> #
> # Compiler configuration
> #
> diff --git a/meson_options.txt b/meson_options.txt
> index c1f8fe69467184..193a74c96d0676 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -27,3 +27,12 @@ option('enable-usdt', type : 'boolean', value : false,
>
> option('enable-io-uring', type: 'boolean', value: true,
> description: 'Enable fuse-over-io-uring support')
> +
> +option('service-socket-dir', type : 'string', value : '',
> + description: 'Where to install fuse server sockets (if empty, /run/filesystems)')
> +
> +option('service-socket-perms', type : 'string', value : '',
> + description: 'Default fuse server socket permissions (if empty, 0220)')
> +
> +option('systemd-system-unit-dir', type : 'string', value : '',
> + description: 'Where to install systemd unit files (if empty, query pkg-config(1))')
> diff --git a/util/fuservicemount.c b/util/fuservicemount.c
> new file mode 100644
> index 00000000000000..9c694a4290f94e
> --- /dev/null
> +++ b/util/fuservicemount.c
> @@ -0,0 +1,18 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * This program can be distributed under the terms of the GNU GPLv2.
> + * See the file GPL2.txt.
> + *
> + * This program wraps the mounting of FUSE filesystems that run in systemd
> + */
> +#define _GNU_SOURCE
> +#include "fuse_config.h"
> +#include "mount_service.h"
> +
> +int main(int argc, char *argv[])
> +{
> + return mount_service_main(argc, argv);
> +}
> diff --git a/util/meson.build b/util/meson.build
> index 0e4b1cce95377e..04ea5ac201340d 100644
> --- a/util/meson.build
> +++ b/util/meson.build
> @@ -6,6 +6,15 @@ executable('fusermount3', ['fusermount.c', '../lib/mount_util.c', '../lib/util.c
> install_dir: get_option('bindir'),
> c_args: '-DFUSE_CONF="@0@"'.format(fuseconf_path))
>
> +if private_cfg.get('HAVE_SERVICEMOUNT', false)
> + executable('fuservicemount3', ['mount_service.c', 'fuservicemount.c', '../lib/mount_util.c'],
> + include_directories: include_dirs,
> + link_with: [ libfuse ],
> + install: true,
> + install_dir: get_option('sbindir'),
> + c_args: '-DFUSE_USE_VERSION=319')
> +endif
> +
> executable('mount.fuse3', ['mount.fuse.c'],
> include_directories: include_dirs,
> link_with: [ libfuse ],
> diff --git a/util/mount_service.c b/util/mount_service.c
> new file mode 100644
> index 00000000000000..a43ff79c7bfb6f
> --- /dev/null
> +++ b/util/mount_service.c
> @@ -0,0 +1,1427 @@
> +/*
> + * FUSE: Filesystem in Userspace
> + * Copyright (C) 2025-2026 Oracle.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + *
> + * This program can be distributed under the terms of the GNU GPLv2.
> + * See the file GPL2.txt.
> + *
> + * This program does the mounting of FUSE filesystems that run in systemd
> + */
> +#define _GNU_SOURCE
> +#include "fuse_config.h"
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdbool.h>
> +#include <limits.h>
> +#include <arpa/inet.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/mman.h>
> +#include <sys/mount.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +
> +#include "mount_util.h"
> +#include "util.h"
> +#include "fuse_i.h"
> +#include "fuse_service_priv.h"
> +#include "mount_service.h"
> +
> +struct mount_service {
> + /* prefix for printing error messages */
> + const char *msgtag;
> +
> + /* fuse subtype based on -t cli argument */
> + char *subtype;
> +
> + /* source argument to mount() */
> + char *source;
> +
> + /* target argument (aka mountpoint) to mount() */
> + char *mountpoint;
> +
> + /* mountpoint that we pass to mount() */
> + char *real_mountpoint;
> +
> + /* resolved path to mountpoint that we use for mtab updates */
> + char *resv_mountpoint;
> +
> + /* mount options */
> + char *mntopts;
> +
> + /* socket fd */
> + int sockfd;
> +
> + /* /dev/fuse device */
> + int fusedevfd;
> +
> + /* memfd for cli arguments */
> + int argvfd;
> +
> + /* fd for mount point */
> + int mountfd;
> +
> + /* did we actually mount successfully? */
> + bool mounted;
> +
> + /* has the fsopen command already been submitted? */
> + bool fsopened;
> +
> + /* is this a fuseblk mount? */
> + bool fuseblk;
> +};
> +
> +static ssize_t __send_fd(struct mount_service *mo,
> + struct fuse_service_requested_file *req,
> + size_t req_sz, int fd)
> +{
> + union {
> + struct cmsghdr cmsghdr;
> + char control[CMSG_SPACE(sizeof(int))];
> + } cmsgu;
> + struct iovec iov = {
> + .iov_base = req,
> + .iov_len = req_sz,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + .msg_control = cmsgu.control,
> + .msg_controllen = sizeof(cmsgu.control),
> + };
> + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
> +
> + if (!cmsg) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + memset(&cmsgu, 0, sizeof(cmsgu));
> + cmsg->cmsg_len = CMSG_LEN(sizeof(int));
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> +
> + *((int *)CMSG_DATA(cmsg)) = fd;
> +
> + return sendmsg(mo->sockfd, &msg, MSG_EOR | MSG_NOSIGNAL);
> +}
> +
> +static ssize_t __send_packet(struct mount_service *mo, void *ptr, size_t len)
> +{
> + struct iovec iov = {
> + .iov_base = ptr,
> + .iov_len = len,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> +
> + return sendmsg(mo->sockfd, &msg, MSG_EOR | MSG_NOSIGNAL);
> +}
> +
> +static ssize_t __recv_packet_size(struct mount_service *mo)
> +{
> + struct iovec iov = { };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> + return recvmsg(mo->sockfd, &msg, MSG_PEEK | MSG_TRUNC);
> +}
> +
> +static ssize_t __recv_packet(struct mount_service *mo, void *ptr, size_t len)
> +{
> + struct iovec iov = {
> + .iov_base = ptr,
> + .iov_len = len,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> +
> + return recvmsg(mo->sockfd, &msg, MSG_TRUNC);
> +}
> +
> +/*
> + * Filter out the subtype of the filesystem (e.g. fuse.Y[.Z] -> Y[.Z]). The
> + * fuse server determines if it's appropriate to set the "blockdev" mount
> + * option (aka fuseblk).
> + */
> +const char *mount_service_subtype(const char *fstype)
> +{
> + const char *subtype;
> +
> + if (!strncmp(fstype, "fuse.", 5))
> + subtype = fstype + 5;
> + else if (!strncmp(fstype, "fuseblk.", 8))
> + subtype = fstype + 8;
> + else
> + subtype = fstype;
> +
> + if (strchr(subtype, '/') != NULL) {
> + fprintf(stderr,
> + "%s: fs subtype cannot contain path separators\n",
> + fstype);
> + return NULL;
> + }
> +
> + return subtype;
> +}
> +
> +static int mount_service_init(struct mount_service *mo, int argc, char *argv[])
> +{
> + char *fstype = NULL;
> + const char *subtype;
> + int i;
> +
> + mo->sockfd = -1;
> + mo->argvfd = -1;
> + mo->fusedevfd = -1;
> + mo->mountfd = -1;
> +
> + for (i = 0; i < argc; i++) {
> + if (!strcmp(argv[i], "-t") && i + 1 < argc) {
> + fstype = argv[i + 1];
> + break;
> + }
> + }
> + if (!fstype) {
> + fprintf(stderr, "%s: cannot determine filesystem type.\n",
> + mo->msgtag);
> + return -1;
> + }
> +
> + subtype = mount_service_subtype(fstype);
> + if (!subtype)
> + return -1;
> +
> + mo->subtype = strdup(subtype);
> + if (!mo->subtype) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: cannot alloc memory for fs subtype: %s\n",
> + mo->msgtag, strerror(error));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +#ifdef SO_PASSRIGHTS
> +static int try_drop_passrights(struct mount_service *mo, int sockfd)
> +{
> + int zero = 0;
> + int ret;
> +
> + /*
> + * Don't let a malicious mount helper send us any fds. We don't trust
> + * the fuse server not to pollute our fd namespace, so we'll end now.
> + */
> + ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSRIGHTS, &zero,
> + sizeof(zero));
> + if (ret) {
> + fprintf(stderr, "%s: disabling fd passing: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> +
> + return 0;
> +}
> +#else
> +# define try_drop_passrights(...) (0)
> +#endif
> +
> +static int check_sendbuf_size(struct mount_service *mo, int sockfd)
> +{
> + const size_t min_size = sizeof_fuse_service_open_command(PATH_MAX);
> + int sendbuf_size = -1;
> + socklen_t optlen = sizeof(sendbuf_size);
> + int ret;
> +
> + /*
> + * If we can't query the maximum send buffer length, just keep going.
> + * Most likely we won't be sending huge open commands, and if we do,
> + * the sendmsg will fail there too.
> + */
> + ret = getsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
> + if (ret || sendbuf_size < 0)
> + return 0;
> +
> + if (sendbuf_size >= min_size)
> + return 0;
> +
> + fprintf(stderr, "%s: max socket send buffer is %d, need at least %zu.\n",
> + mo->msgtag, sendbuf_size, min_size);
> + return MOUNT_SERVICE_FALLBACK_NEEDED;
> +}
> +
> +static int mount_service_connect(struct mount_service *mo)
> +{
> + struct sockaddr_un name = {
> + .sun_family = AF_UNIX,
> + };
> + int sockfd;
> + ssize_t written;
> + int ret;
> +
> + written = snprintf(name.sun_path, sizeof(name.sun_path),
> + FUSE_SERVICE_SOCKET_DIR "/%s", mo->subtype);
> + if (written >= sizeof(name.sun_path)) {
> + fprintf(stderr, "%s: filesystem type name `%s' is too long.\n",
> + mo->msgtag, mo->subtype);
> + return -1;
> + }
> +
> + sockfd = socket(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
> + if (sockfd < 0) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: opening %s service socket: %s\n",
> + mo->msgtag, mo->subtype, strerror(error));
> + return -1;
> + }
> +
> + ret = check_sendbuf_size(mo, sockfd);
> + if (ret)
> + return ret;
> +
> + ret = connect(sockfd, (const struct sockaddr *)&name, sizeof(name));
> + if (ret && (errno == ENOENT || errno == ECONNREFUSED)) {
> + fprintf(stderr, "%s: no safe filesystem driver for %s available.\n",
> + mo->msgtag, mo->subtype);
> + close(sockfd);
> + return MOUNT_SERVICE_FALLBACK_NEEDED;
> + }
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, name.sun_path, strerror(error));
> + goto out;
> + }
> +
> + ret = try_drop_passrights(mo, sockfd);
> + if (ret)
> + goto out;
> +
> + mo->sockfd = sockfd;
> + return 0;
> +out:
> + close(sockfd);
> + return -1;
> +}
> +
> +static int mount_service_send_hello(struct mount_service *mo)
> +{
> + struct fuse_service_hello hello = {
> + .p.magic = htonl(FUSE_SERVICE_HELLO_CMD),
> + .min_version = htons(FUSE_SERVICE_MIN_PROTO),
> + .max_version = htons(FUSE_SERVICE_MAX_PROTO),
> + };
> + struct fuse_service_hello_reply reply = { };
> + ssize_t size;
> +
> + if (getuid() == 0)
> + hello.flags |= htonl(FUSE_SERVICE_FLAG_ALLOW_OTHER);
> +
> + size = __send_packet(mo, &hello, sizeof(hello));
> + if (size < 0) {
> + fprintf(stderr, "%s: send hello: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> +
> + size = __recv_packet(mo, &reply, sizeof(reply));
> + if (size < 0) {
> + fprintf(stderr, "%s: hello reply: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + if (size != sizeof(reply)) {
> + fprintf(stderr, "%s: wrong hello reply size %zd, expected %zu\n",
> + mo->msgtag, size, sizeof(reply));
> + return -1;
> + }
> +
> + if (ntohl(reply.p.magic) != FUSE_SERVICE_HELLO_REPLY) {
> + fprintf(stderr, "%s: %s service server did not reply to hello\n",
> + mo->msgtag, mo->subtype);
> + return -1;
> + }
> +
> + if (ntohs(reply.version) < FUSE_SERVICE_MIN_PROTO ||
> + ntohs(reply.version) > FUSE_SERVICE_MAX_PROTO) {
> + fprintf(stderr, "%s: unsupported protocol version %u\n",
> + mo->msgtag, ntohs(reply.version));
> + return -1;
> + }
> +
> + if (reply.padding) {
> + fprintf(stderr, "%s: nonzero value in padding field\n",
> + mo->msgtag);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int mount_service_capture_arg(struct mount_service *mo,
> + struct fuse_service_memfd_argv *args,
> + const char *string, off_t *array_pos,
> + off_t *string_pos)
> +{
> + const size_t string_len = strlen(string) + 1;
> + struct fuse_service_memfd_arg arg = {
> + .pos = htonl(*string_pos),
> + .len = htonl(string_len),
> + };
> + ssize_t written;
> +
> + written = pwrite(mo->argvfd, string, string_len, *string_pos);
> + if (written < 0) {
> + fprintf(stderr, "%s: memfd argv write: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + if (written < string_len) {
> + fprintf(stderr, "%s: memfd argv[%u] wrote %zd, expected %zu\n",
> + mo->msgtag, args->argc, written, string_len);
> + return -1;
> + }
> +
> + written = pwrite(mo->argvfd, &arg, sizeof(arg), *array_pos);
> + if (written < 0) {
> + fprintf(stderr, "%s: memfd arg write: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + if (written < sizeof(arg)) {
> + fprintf(stderr, "%s: memfd arg[%u] wrote %zd, expected %zu\n",
> + mo->msgtag, args->argc, written, sizeof(arg));
> + return -1;
> + }
> +
> + args->argc++;
> + *string_pos += string_len;
> + *array_pos += sizeof(arg);
> +
> + return 0;
> +}
> +
> +static int mount_service_capture_args(struct mount_service *mo, int argc,
> + char *argv[])
> +{
> + struct fuse_service_memfd_argv args = {
> + .magic = htonl(FUSE_SERVICE_ARGS_MAGIC),
> + };
> + off_t array_pos = sizeof(struct fuse_service_memfd_argv);
> + off_t string_pos = array_pos +
> + (argc * sizeof(struct fuse_service_memfd_arg));
> + ssize_t written;
> + int i;
> + int ret;
> +
> + if (argc < 0) {
> + fprintf(stderr, "%s: argc cannot be negative\n",
> + mo->msgtag);
> + return -1;
> + }
> +
> + /*
> + * Create the memfd in which we'll stash arguments, and set the write
> + * pointer for the names.
> + */
> + mo->argvfd = memfd_create("fuse service argv", MFD_CLOEXEC);
> + if (mo->argvfd < 0) {
> + fprintf(stderr, "%s: argvfd create: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> +
> + /*
> + * Write the alleged subtype as if it were argv[0], then write the rest
> + * of the argv arguments.
> + */
> + ret = mount_service_capture_arg(mo, &args, mo->subtype, &array_pos,
> + &string_pos);
> + if (ret)
> + return ret;
> +
> + for (i = 1; i < argc; i++) {
> + /* skip the -t(ype) argument */
> + if (!strcmp(argv[i], "-t") && i + 1 < argc) {
> + i++;
> + continue;
> + }
> +
> + ret = mount_service_capture_arg(mo, &args, argv[i],
> + &array_pos, &string_pos);
> + if (ret)
> + return ret;
> + }
> +
> + /* Now write the header */
> + args.argc = htonl(args.argc);
> + written = pwrite(mo->argvfd, &args, sizeof(args), 0);
> + if (written < 0) {
> + fprintf(stderr, "%s: memfd argv write: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + if (written < sizeof(args)) {
> + fprintf(stderr, "%s: memfd argv wrote %zd, expected %zu\n",
> + mo->msgtag, written, sizeof(args));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int mount_service_send_file(struct mount_service *mo,
> + const char *path, int fd)
> +{
> + struct fuse_service_requested_file *req;
> + const size_t req_sz =
> + sizeof_fuse_service_requested_file(strlen(path));
> + ssize_t written;
> + int ret = 0;
> +
> + req = calloc(1, req_sz);
> + if (!req) {
> + fprintf(stderr, "%s: alloc send file reply: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + req->p.magic = htonl(FUSE_SERVICE_OPEN_REPLY);
> + req->error = 0;
> + strcpy(req->path, path);
> +
> + written = __send_fd(mo, req, req_sz, fd);
> + if (written < 0) {
> + fprintf(stderr, "%s: send file reply: %s\n",
> + mo->msgtag, strerror(errno));
> + ret = -1;
> + goto out_req;
> + }
> + if (written < req_sz) {
> + fprintf(stderr, "%s: send file reply wrote %zd, expected %zu\n",
> + mo->msgtag, written, req_sz);
> + ret = -1;
> + goto out_req;
> + }
> +
> +out_req:
> + free(req);
> + return ret;
> +}
> +
> +static int mount_service_send_file_error(struct mount_service *mo, int error,
> + const char *path)
> +{
> + struct fuse_service_requested_file *req;
> + const size_t req_sz =
> + sizeof_fuse_service_requested_file(strlen(path));
> + ssize_t written;
> + int ret = 0;
> +
> + req = calloc(1, req_sz);
> + if (!req) {
> + fprintf(stderr, "%s: alloc send file error: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + req->p.magic = htonl(FUSE_SERVICE_OPEN_REPLY);
> + req->error = htonl(error);
> + strcpy(req->path, path);
> +
> + written = __send_packet(mo, req, req_sz);
> + if (written < 0) {
> + fprintf(stderr, "%s: send file error: %s\n",
> + mo->msgtag, strerror(errno));
> + ret = -1;
> + goto out_req;
> + }
> + if (written < req_sz) {
> + fprintf(stderr, "%s: send file error wrote %zd, expected %zu\n",
> + mo->msgtag, written, req_sz);
> + ret = -1;
> + goto out_req;
> + }
> +
> +out_req:
> + free(req);
> + return ret;
> +}
> +
> +static int mount_service_send_required_files(struct mount_service *mo,
> + const char *fusedev)
> +{
> + int ret;
> +
> + mo->fusedevfd = open(fusedev, O_RDWR | O_CLOEXEC);
> + if (mo->fusedevfd < 0) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, fusedev, strerror(error));
> + return -1;
> + }
> +
> + ret = mount_service_send_file(mo, FUSE_SERVICE_ARGV, mo->argvfd);
> + if (ret)
> + goto out_fusedevfd;
> +
> + close(mo->argvfd);
> + mo->argvfd = -1;
> +
> + return mount_service_send_file(mo, FUSE_SERVICE_FUSEDEV,
> + mo->fusedevfd);
> +
> +out_fusedevfd:
> + close(mo->fusedevfd);
> + mo->fusedevfd = -1;
> + return ret;
> +}
> +
> +static int mount_service_receive_command(struct mount_service *mo,
> + struct fuse_service_packet **commandp,
> + size_t *commandsz)
> +{
> + struct fuse_service_packet *command;
> + ssize_t alleged_size, size;
> +
> + alleged_size = __recv_packet_size(mo);
> + if (alleged_size < 0) {
> + fprintf(stderr, "%s: peek service command: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> + if (alleged_size == 0) {
> + /* fuse server probably exited early */
> + fprintf(stderr, "%s: fuse server exited without saying goodbye!\n",
> + mo->msgtag);
> + return -1;
> + }
> + if (alleged_size < sizeof(struct fuse_service_packet)) {
> + fprintf(stderr, "%s: wrong command packet size %zd, expected at least %zu\n",
> + mo->msgtag, alleged_size,
> + sizeof(struct fuse_service_packet));
> + return -1;
> + }
> + if (alleged_size > FUSE_SERVICE_MAX_CMD_SIZE) {
> + fprintf(stderr, "%s: wrong command packet size %zd, expected less than %d\n",
> + mo->msgtag, alleged_size, FUSE_SERVICE_MAX_CMD_SIZE);
> + return -1;
> + }
> +
> + command = calloc(1, alleged_size + 1);
> + if (!command) {
> + fprintf(stderr, "%s: alloc service command: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> +
> + size = __recv_packet(mo, command, alleged_size);
> + if (size < 0) {
> + fprintf(stderr, "%s: receive service command: %s\n",
> + mo->msgtag, strerror(errno));
> + free(command);
> + return -1;
> + }
> + if (size != alleged_size) {
> + fprintf(stderr, "%s: wrong service command size %zd, expected %zd\n",
> + mo->msgtag, size, alleged_size);
> + free(command);
> + return -1;
> + }
> +
> + *commandp = command;
> + *commandsz = size;
> + return 0;
> +}
> +
> +static int mount_service_send_reply(struct mount_service *mo, int error)
> +{
> + struct fuse_service_simple_reply reply = {
> + .p.magic = htonl(FUSE_SERVICE_SIMPLE_REPLY),
> + .error = htonl(error),
> + };
> + ssize_t size;
> +
> + size = __send_packet(mo, &reply, sizeof(reply));
> + if (size < 0) {
> + fprintf(stderr, "%s: send service reply: %s\n",
> + mo->msgtag, strerror(errno));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int prepare_bdev(struct mount_service *mo,
> + struct fuse_service_open_command *oc, int fd)
> +{
> + struct stat stbuf;
> + int ret;
> +
> + ret = fstat(fd, &stbuf);
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, oc->path, strerror(error));
> + return -error;
> + }
> +
> + if (!S_ISBLK(stbuf.st_mode)) {
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, oc->path, strerror(ENOTBLK));
> + return -ENOTBLK;
> + }
> +
> + if (oc->block_size) {
> + int block_size = ntohl(oc->block_size);
> +
> + ret = ioctl(fd, BLKBSZSET, &block_size);
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, oc->path, strerror(error));
> + return -error;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int mount_service_open_path(struct mount_service *mo,
> + mode_t expected_fmt,
> + struct fuse_service_packet *p, size_t psz)
> +{
> + struct fuse_service_open_command *oc =
> + container_of(p, struct fuse_service_open_command, p);
> + uint32_t request_flags;
> + int open_flags;
> + int ret;
> + int fd;
> +
> + if (psz < sizeof_fuse_service_open_command(1)) {
> + fprintf(stderr, "%s: open command too small\n",
> + mo->msgtag);
> + return mount_service_send_file_error(mo, EINVAL, "?");
> + }
> +
> + if (!check_null_endbyte(p, psz)) {
> + fprintf(stderr, "%s: open command must be null terminated\n",
> + mo->msgtag);
> + return mount_service_send_file_error(mo, EINVAL, "?");
> + }
> +
> + request_flags = ntohl(oc->request_flags);
> + if (request_flags & ~FUSE_SERVICE_OPEN_FLAGS) {
> + fprintf(stderr, "%s: open flags 0x%x not recognized\n",
> + mo->msgtag, request_flags & ~FUSE_SERVICE_OPEN_FLAGS);
> + return mount_service_send_file_error(mo, EINVAL, oc->path);
> + }
> +
> + open_flags = ntohl(oc->open_flags) | O_CLOEXEC;
> + fd = open(oc->path, open_flags, ntohl(oc->create_mode));
> + if (fd < 0) {
> + int error = errno;
> +
> + /*
> + * Don't print a busy device error report because the
> + * filesystem might decide to retry.
> + */
> + if (error != EBUSY && !(request_flags & FUSE_SERVICE_OPEN_QUIET))
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, oc->path, strerror(error));
> + return mount_service_send_file_error(mo, error, oc->path);
> + }
> +
> + if (S_ISBLK(expected_fmt)) {
> + ret = prepare_bdev(mo, oc, fd);
> + if (ret < 0) {
> + close(fd);
> + return mount_service_send_file_error(mo, -ret,
> + oc->path);
> + }
> + }
> +
> + ret = mount_service_send_file(mo, oc->path, fd);
> + close(fd);
> + return ret;
> +}
> +
> +static int mount_service_handle_open_cmd(struct mount_service *mo,
> + struct fuse_service_packet *p,
> + size_t psz)
> +{
> + return mount_service_open_path(mo, 0, p, psz);
> +}
> +
> +static int mount_service_handle_open_bdev_cmd(struct mount_service *mo,
> + struct fuse_service_packet *p,
> + size_t psz)
> +{
> + return mount_service_open_path(mo, S_IFBLK, p, psz);
> +}
> +
> +static inline const char *fsname(const struct mount_service *mo)
> +{
> + return mo->fuseblk ? "fuseblk" : "fuse";
> +}
> +
> +static int mount_service_handle_fsopen_cmd(struct mount_service *mo,
> + const struct fuse_service_packet *p,
> + size_t psz)
> +{
> + struct fuse_service_fsopen_command *oc =
> + container_of(p, struct fuse_service_fsopen_command, p);
> + uint32_t fsopen_flags;
> +
> + if (psz != sizeof(struct fuse_service_fsopen_command)) {
> + fprintf(stderr, "%s: fsopen command wrong size %zu, expected %zu\n",
> + mo->msgtag, psz, sizeof(*oc));
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (mo->fsopened) {
> + fprintf(stderr, "%s: fsopen command respecified\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + fsopen_flags = ntohl(oc->fsopen_flags);
> + if (fsopen_flags & ~FUSE_SERVICE_FSOPEN_FLAGS) {
> + fprintf(stderr, "%s: unknown fsopen flags, 0x%x\n",
> + mo->msgtag, fsopen_flags & ~FUSE_SERVICE_FSOPEN_FLAGS);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (fsopen_flags & FUSE_SERVICE_FSOPEN_FUSEBLK) {
> + if (getuid() != 0) {
> + fprintf(stderr, "%s: fuseblk requires root privilege\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EPERM);
> + }
> +
> + mo->fuseblk = true;
> + }
> + mo->fsopened = true;
> +
> + return mount_service_send_reply(mo, 0);
> +}
> +
> +static int mount_service_handle_source_cmd(struct mount_service *mo,
> + const struct fuse_service_packet *p,
> + size_t psz)
> +{
> + struct fuse_service_string_command *oc =
> + container_of(p, struct fuse_service_string_command, p);
> +
> + if (psz < sizeof_fuse_service_string_command(1)) {
> + fprintf(stderr, "%s: source command too small\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!check_null_endbyte(p, psz)) {
> + fprintf(stderr, "%s: source command must be null terminated\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (mo->source) {
> + fprintf(stderr, "%s: source respecified!\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + mo->source = strdup(oc->value);
> + if (!mo->source) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: alloc source string: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + return mount_service_send_reply(mo, 0);
> +}
> +
> +static int mount_service_handle_mntopts_cmd(struct mount_service *mo,
> + const struct fuse_service_packet *p,
> + size_t psz)
> +{
> + struct fuse_service_string_command *oc =
> + container_of(p, struct fuse_service_string_command, p);
> +
> + if (psz < sizeof_fuse_service_string_command(1)) {
> + fprintf(stderr, "%s: mount options command too small\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!check_null_endbyte(p, psz)) {
> + fprintf(stderr, "%s: mount options command must be null terminated\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (mo->mntopts) {
> + fprintf(stderr, "%s: mount options respecified!\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + mo->mntopts = strdup(oc->value);
> + if (!mo->mntopts) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: alloc mount options string: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + return mount_service_send_reply(mo, 0);
> +}
> +
> +static int attach_to_mountpoint(struct mount_service *mo, mode_t expected_fmt,
> + char *mntpt)
> +{
> + struct stat stbuf;
> + char *res_mntpt;
> + int mountfd = -1;
> + int error;
> + int ret;
> +
> + /*
> + * Open the alleged mountpoint, make sure it's a dir or a file.
> + */
> + mountfd = open(mntpt, O_RDONLY | O_CLOEXEC);
> + if (mountfd < 0) {
> + error = errno;
> + fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mntpt,
> + strerror(error));
> + goto out_error;
> + }
> +
> + /*
> + * Make sure we can access the mountpoint and that it's either a
> + * directory or a regular file. Linux can handle mounting atop special
> + * files, but we don't care to do such crazy things.
> + */
> + ret = fstat(mountfd, &stbuf);
> + if (ret) {
> + error = errno;
> + fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mntpt,
> + strerror(error));
> + goto out_mountfd;
> + }
> +
> + if (!S_ISDIR(stbuf.st_mode) && !S_ISREG(stbuf.st_mode)) {
> + error = EACCES;
> + fprintf(stderr, "%s: %s: Mount point must be directory or regular file.\n",
> + mo->msgtag, mntpt);
> + goto out_mountfd;
> + }
> +
> + /*
> + * Resolve the (possibly relative) mountpoint path before chdir'ing
> + * onto it.
> + */
> + res_mntpt = fuse_mnt_resolve_path(mo->msgtag, mntpt);
> + if (!res_mntpt) {
> + error = EACCES;
> + fprintf(stderr, "%s: %s: Could not resolve path to mount point.\n",
> + mo->msgtag, mntpt);
> + goto out_mountfd;
> + }
> +
> + /* Make sure the mountpoint type matches what the caller wanted */
> + switch (expected_fmt) {
> + case S_IFDIR:
> + if (!S_ISDIR(stbuf.st_mode)) {
> + error = ENOTDIR;
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, mntpt, strerror(error));
> + goto out_res_mntpt;
> + }
> + break;
> + case S_IFREG:
> + if (!S_ISREG(stbuf.st_mode)) {
> + error = EISDIR;
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, mntpt, strerror(error));
> + goto out_res_mntpt;
> + }
> + break;
> + }
> +
> + switch (stbuf.st_mode & S_IFMT) {
> + case S_IFREG:
> + /*
> + * This is a regular file, so we point mount() at the open file
> + * descriptor.
> + */
> + asprintf(&mo->real_mountpoint, "/dev/fd/%d", mountfd);
> + break;
> + case S_IFDIR:
> + /*
> + * Pin the mount so it can't go anywhere. This only works for
> + * directories, which is fortunately the common case.
> + */
> + ret = fchdir(mountfd);
> + if (ret) {
> + error = errno;
> + fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mntpt,
> + strerror(error));
> + goto out_res_mntpt;
> + }
> +
> + /*
> + * Now that we're sitting on the mountpoint directory, we can
> + * pass "." to mount() and avoid races with directory tree
> + * mutations.
> + */
> + mo->real_mountpoint = strdup(".");
> + break;
> + default:
> + /* Should never get here */
> + error = EINVAL;
> + goto out_res_mntpt;
> + }
> + if (!mo->real_mountpoint) {
> + error = ENOMEM;
> + fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mntpt,
> + strerror(error));
> + goto out_res_mntpt;
> + }
> +
> + mo->mountpoint = mntpt;
> + mo->mountfd = mountfd;
> + mo->resv_mountpoint = res_mntpt;
> +
> + return mount_service_send_reply(mo, 0);
> +
> +out_res_mntpt:
> + free(res_mntpt);
> +out_mountfd:
> + close(mountfd);
> +out_error:
> + free(mntpt);
> + return mount_service_send_reply(mo, error);
> +}
> +
> +static int mount_service_handle_mountpoint_cmd(struct mount_service *mo,
> + const struct fuse_service_packet *p,
> + size_t psz, int argc, char *argv[])
> +{
> + struct fuse_service_mountpoint_command *oc =
> + container_of(p, struct fuse_service_mountpoint_command, p);
> + char *mntpt;
> + mode_t expected_fmt;
> + bool foundit = false;
> + int i;
> +
> + if (psz < sizeof_fuse_service_mountpoint_command(1)) {
> + fprintf(stderr, "%s: mount point command too small\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!check_null_endbyte(p, psz)) {
> + fprintf(stderr, "%s: mount point command must be null terminated\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (oc->padding) {
> + fprintf(stderr, "%s: nonzero value in padding field\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (mo->mountpoint) {
> + fprintf(stderr, "%s: mount point respecified!\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + /* Make sure the mountpoint file format matches what the caller wanted */
> + expected_fmt = ntohs(oc->expected_fmt);
> + switch (expected_fmt) {
> + case S_IFDIR:
> + case S_IFREG:
> + case 0:
> + break;
> + default:
> + fprintf(stderr, "%s: %s: weird expected format 0%o\n",
> + mo->msgtag, oc->value, expected_fmt);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + /* Mountpoint must be mentioned in the caller's argument list */
> + for (i = 0; i < argc; i++) {
> + if (!strcmp(argv[i], oc->value)) {
> + foundit = true;
> + break;
> + }
> + }
> + if (!foundit) {
> + fprintf(stderr, "%s: mount point must be in command line arguments\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + mntpt = strdup(oc->value);
> + if (!mntpt) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: alloc mount point string: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + return attach_to_mountpoint(mo, expected_fmt, mntpt);
> +}
> +
> +static inline int format_libfuse_mntopts(char *buf, size_t bufsz,
> + const struct mount_service *mo,
> + const struct stat *stbuf)
> +{
> + if (mo->mntopts)
> + return snprintf(buf, bufsz,
> + "%s,fd=%i,rootmode=%o,user_id=%u,group_id=%u",
> + mo->mntopts, mo->fusedevfd,
> + stbuf->st_mode & S_IFMT,
> + getuid(), getgid());
> +
> + return snprintf(buf, bufsz,
> + "fd=%i,rootmode=%o,user_id=%u,group_id=%u",
> + mo->fusedevfd, stbuf->st_mode & S_IFMT,
> + getuid(), getgid());
> +}
> +
> +static int mount_service_regular_mount(struct mount_service *mo,
> + struct fuse_service_mount_command *oc,
> + struct stat *stbuf)
> +{
> + char *fstype = NULL;
> + char *realmopts;
> + int ret;
> +
> + /* Compute the amount of buffer space needed for the mount options */
> + ret = format_libfuse_mntopts(NULL, 0, mo, stbuf);
> + if (ret < 0) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: mount option preformatting: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + realmopts = calloc(1, ret + 1);
> + if (!realmopts) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: alloc real mount options string: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + ret = format_libfuse_mntopts(realmopts, ret + 1, mo, stbuf);
> + if (ret < 0) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: mount options formatting: %s\n",
> + mo->msgtag, strerror(error));
> + ret = mount_service_send_reply(mo, error);
> + goto out_realmopts;
> + }
> +
> + asprintf(&fstype, "%s.%s", fsname(mo), mo->subtype);
> + if (!fstype) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: mount fstype formatting: %s\n",
> + mo->msgtag, strerror(error));
> + ret = mount_service_send_reply(mo, error);
> + goto out_realmopts;
> + }
> +
> + ret = mount(mo->source, mo->real_mountpoint, fstype,
> + ntohl(oc->ms_flags), realmopts);
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: mount: %s\n",
> + mo->msgtag, strerror(error));
> + ret = mount_service_send_reply(mo, error);
> + goto out_fstype;
> + }
> +
> + mo->mounted = true;
> + ret = mount_service_send_reply(mo, 0);
> +out_fstype:
> + free(fstype);
> +out_realmopts:
> + free(realmopts);
> + return ret;
> +}
> +
> +static int mount_service_handle_mount_cmd(struct mount_service *mo,
> + struct fuse_service_packet *p,
> + size_t psz)
> +{
> + struct stat stbuf;
> + struct fuse_service_mount_command *oc =
> + container_of(p, struct fuse_service_mount_command, p);
> + int ret;
> +
> + if (psz != sizeof(struct fuse_service_mount_command)) {
> + fprintf(stderr, "%s: mount command wrong size %zu, expected %zu\n",
> + mo->msgtag, psz, sizeof(*oc));
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!mo->source) {
> + fprintf(stderr, "%s: missing mount source parameter\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!mo->mountpoint) {
> + fprintf(stderr, "%s: missing mount point parameter\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + /*
> + * Call fstat again because access modes might have changed since we
> + * validated the file type. This is still racy with mount since we
> + * don't lock the path target.
> + */
> + ret = fstat(mo->mountfd, &stbuf);
> + if (ret < 0) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: %s: %s\n",
> + mo->msgtag, mo->mountpoint, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + return mount_service_regular_mount(mo, oc, &stbuf);
> +}
> +
> +static int mount_service_handle_unmount_cmd(struct mount_service *mo,
> + struct fuse_service_packet *p,
> + size_t psz)
> +{
> + int ret;
> +
> + (void)p;
> +
> + if (psz != sizeof(struct fuse_service_unmount_command)) {
> + fprintf(stderr, "%s: unmount command wrong size %zu, expected %zu\n",
> + mo->msgtag, psz, sizeof(struct fuse_service_unmount_command));
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + if (!mo->mounted) {
> + fprintf(stderr, "%s: will not umount before successful mount!\n",
> + mo->msgtag);
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + ret = chdir("/");
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: fuse server failed chdir: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + close(mo->mountfd);
> + mo->mountfd = -1;
> +
> + /*
> + * Try to unmount the resolved mountpoint, and hope that we're not the
> + * victim of a race.
> + */
> + ret = umount2(mo->resv_mountpoint, MNT_DETACH);
> + if (ret) {
> + int error = errno;
> +
> + fprintf(stderr, "%s: fuse server failed unmount: %s\n",
> + mo->msgtag, strerror(error));
> + return mount_service_send_reply(mo, error);
> + }
> +
> + mo->mounted = false;
> + return mount_service_send_reply(mo, 0);
> +}
> +
> +static int mount_service_handle_bye_cmd(struct mount_service *mo,
> + struct fuse_service_packet *p,
> + size_t psz)
> +{
> + struct fuse_service_bye_command *bc =
> + container_of(p, struct fuse_service_bye_command, p);
> + int ret;
> +
> + if (psz != sizeof(struct fuse_service_bye_command)) {
> + fprintf(stderr, "%s: bye command wrong size %zu, expected %zu\n",
> + mo->msgtag, psz, sizeof(*bc));
> + return mount_service_send_reply(mo, EINVAL);
> + }
> +
> + ret = ntohl(bc->exitcode);
> + if (ret)
> + fprintf(stderr, "%s: fuse server failed mount, check dmesg/logs for details.\n",
> + mo->msgtag);
> +
> + return ret;
> +}
> +
> +static void mount_service_destroy(struct mount_service *mo)
> +{
> + close(mo->mountfd);
> + close(mo->fusedevfd);
> + close(mo->argvfd);
> + shutdown(mo->sockfd, SHUT_RDWR);
> + close(mo->sockfd);
> +
> + free(mo->source);
> + free(mo->mountpoint);
> + free(mo->real_mountpoint);
> + free(mo->resv_mountpoint);
> + free(mo->mntopts);
> + free(mo->subtype);
> +
> + memset(mo, 0, sizeof(*mo));
> + mo->sockfd = -1;
> + mo->argvfd = -1;
> + mo->fusedevfd = -1;
> + mo->mountfd = -1;
> +}
> +
> +int mount_service_main(int argc, char *argv[])
> +{
> + const char *fusedev = fuse_mnt_get_devname();
> + struct mount_service mo = { };
> + bool running = true;
> + int ret;
> +
> + if (argc < 3 || !strcmp(argv[1], "--help")) {
> + printf("Usage: %s source mountpoint -t type [-o options]\n",
> + argv[0]);
> + return EXIT_FAILURE;
> + }
> +
> + if (argc > 0 && argv[0])
> + mo.msgtag = argv[0];
> + else
> + mo.msgtag = "mount.service";
> +
> + ret = mount_service_init(&mo, argc, argv);
> + if (ret)
> + return EXIT_FAILURE;
> +
> + ret = mount_service_connect(&mo);
> + if (ret == MOUNT_SERVICE_FALLBACK_NEEDED)
> + goto out;
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + ret = mount_service_send_hello(&mo);
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + ret = mount_service_capture_args(&mo, argc, argv);
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + ret = mount_service_send_required_files(&mo, fusedev);
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + while (running) {
> + struct fuse_service_packet *p = NULL;
> + size_t sz;
> +
> + ret = mount_service_receive_command(&mo, &p, &sz);
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + switch (ntohl(p->magic)) {
> + case FUSE_SERVICE_OPEN_CMD:
> + ret = mount_service_handle_open_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_OPEN_BDEV_CMD:
> + ret = mount_service_handle_open_bdev_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_FSOPEN_CMD:
> + ret = mount_service_handle_fsopen_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_SOURCE_CMD:
> + ret = mount_service_handle_source_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_MNTOPTS_CMD:
> + ret = mount_service_handle_mntopts_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_MNTPT_CMD:
> + ret = mount_service_handle_mountpoint_cmd(&mo, p, sz,
> + argc, argv);
> + break;
> + case FUSE_SERVICE_MOUNT_CMD:
> + ret = mount_service_handle_mount_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_UNMOUNT_CMD:
> + ret = mount_service_handle_unmount_cmd(&mo, p, sz);
> + break;
> + case FUSE_SERVICE_BYE_CMD:
> + ret = mount_service_handle_bye_cmd(&mo, p, sz);
> + free(p);
> + goto out;
> + default:
> + fprintf(stderr, "%s: unrecognized packet 0x%x\n",
> + mo.msgtag, ntohl(p->magic));
> + ret = EXIT_FAILURE;
> + break;
> + }
> + free(p);
> +
> + if (ret) {
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> + }
> +
> + ret = EXIT_SUCCESS;
> +out:
> + mount_service_destroy(&mo);
> + return ret;
> +}
>
>
^ permalink raw reply
* [RFC] ext4: possible inconsistency in ext4_append() error path
From: Vineet Agarwal @ 2026-05-01 17:25 UTC (permalink / raw)
To: tytso
Cc: linux-ext4, adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
yi.zhang, linux-kernel, Vineet Agarwal
Hi,
While looking into ext4 directory operations, I noticed a possible
inconsistency in the error handling of ext4_append().
In ext4_append(), the inode size is updated before all failure points
have been ruled out:
bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
if (IS_ERR(bh))
return bh;
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_mark_inode_dirty(handle, inode);
if (err)
goto out;
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
if (err)
goto out;
If either ext4_mark_inode_dirty() or
ext4_journal_get_write_access() fails, the function returns an
error but does not restore the original inode size.
Callers of ext4_append() appear to treat it as an all-or-nothing
operation:
bh = ext4_append(handle, dir, &block);
if (IS_ERR(bh))
goto out;
However, in the failure case, inode->i_size may already have been
increased.
One possible consequence is that subsequent checks relying on i_size,
such as:
if (block >= inode->i_size >> inode->i_blkbits)
may allow a block index to pass bounds checks even though the append
operation did not complete successfully.
I understand that journaling may ensure on-disk consistency, but the
in-memory inode state may still temporarily reflect a change that did
not logically succeed.
Is this behavior intentional, or should ext4_append() avoid updating
i_size until after all failure points, or roll it back on error?
Thanks,
Vineet Agarwal
^ permalink raw reply
* Re: [PATCH v3] generic/790: test post-EOF gap zeroing persistence
From: Zorro Lang @ 2026-05-01 16:46 UTC (permalink / raw)
To: Zhang Yi
Cc: fstests, linux-ext4, linux-fsdevel, bfoster, jack, yi.zhang,
yizhang089, yangerkun
In-Reply-To: <20260428085750.1072612-1-yi.zhang@huaweicloud.com>
On Tue, Apr 28, 2026 at 04:57:50PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> Test that extending a file past a non-block-aligned EOF correctly
> zero-fills the gap [old_EOF, block_boundary), and that this zeroing
> persists through a filesystem shutdown+remount cycle.
>
> Stale data beyond EOF can persist on disk when append write data blocks
> are flushed before the on-disk file size update, or when concurrent
> append writeback and mmap writes persist non-zero data past EOF.
> Subsequent post-EOF operations (append write, fallocate, truncate up)
> must zero-fill and persist the gap to prevent exposing stale data.
>
> The test pollutes the file's last physical block (via FIEMAP + raw
> device write) with a sentinel pattern beyond i_size, then performs each
> extend operation and verifies the gap is zeroed both in memory and on
> disk.
>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
> v2->v3:
> - Add error check for the raw device pwrite, a failed pwrite would
> silently leave the test continuing with an unpolluted block,
> producing false-positive passes.
> - Add sync_range -a to wait until the extending I/O completes and to
> ensure file size update is persisted before shutdown, preventing
> unexpected file size errors.
> v1->v2:
> - Add _require_no_realtime to prevent testing on XFS realtime devices,
> where file data may reside on $SCRATCH_RTDEV.
> - Add _exclude_fs btrfs since FIEMAP returns logical addresses, not
> physical device offsets, writing to these offsets on $SCRATCH_DEV
> would corrupt the filesystem in multi-device setups. Besides, since
> btrfs doesn't support shutdown right now, we can support it later.
> - Add -v flag to od in _check_gap_zero() to prevent line folding of
> identical consecutive lines.
> - Add expected_new_sz parameter to _test_eof_zeroing(), verify file
> size was not rolled back after shutdown+remount cycle, and also drop
> the unnecessary file size check before the shutdown as well.
> - Clarify the comment regarding when stale data beyond EOF can persist.
>
> tests/generic/790 | 168 ++++++++++++++++++++++++++++++++++++++++++
> tests/generic/790.out | 4 +
> 2 files changed, 172 insertions(+)
> create mode 100755 tests/generic/790
> create mode 100644 tests/generic/790.out
>
> diff --git a/tests/generic/790 b/tests/generic/790
> new file mode 100755
> index 00000000..6daf3793
> --- /dev/null
> +++ b/tests/generic/790
> @@ -0,0 +1,168 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2026 Huawei. All Rights Reserved.
> +#
> +# FS QA Test No. 790
> +#
> +# Test that extending a file past a non-block-aligned EOF correctly zero-fills
> +# the gap [old_EOF, block_boundary), and that this zeroing persists through a
> +# filesystem shutdown+remount cycle.
> +#
> +# Stale data beyond EOF can persist on disk when:
> +# 1) append write data blocks are flushed before the on-disk file size update,
> +# and the system crashes in this window.
> +# 2) concurrent append writeback and mmap writes persist non-zero data past EOF.
> +#
> +# Subsequent post-EOF operations (append write, fallocate, truncate up) must
> +# zero-fill and persist the gap to prevent exposing stale data.
> +#
> +# The test pollutes the file's last physical block (via FIEMAP + raw device
> +# write) with a sentinel pattern beyond i_size, then performs each extend
> +# operation and verifies the gap is zeroed both in memory and on disk.
> +#
> +. ./common/preamble
> +_begin_fstest auto quick rw shutdown
> +
> +. ./common/filter
> +
> +_require_scratch
> +_require_block_device $SCRATCH_DEV
> +_require_no_realtime
> +_require_scratch_shutdown
> +_require_metadata_journaling $SCRATCH_DEV
> +
> +# FIEMAP on Btrfs returns logical addresses within the filesystem's address
> +# space, not physical device offsets. Writing to these offsets on $SCRATCH_DEV
> +# would corrupt the filesystem in multi-device setups.
> +_exclude_fs btrfs
> +
> +_require_xfs_io_command "fiemap"
> +_require_xfs_io_command "falloc"
Add "fiemap" and "prealloc" groups, I'll do that when I merge it.
> +_require_xfs_io_command "pwrite"
> +_require_xfs_io_command "truncate"
> +_require_xfs_io_command "sync_range"
> +
> +# Check that gap region [offset, offset+nbytes) is entirely zero
> +_check_gap_zero()
> +{
> + local file="$1"
> + local offset="$2"
> + local nbytes="$3"
> + local label="$4"
> + local data
> + local stripped
> +
> + data=$(od -A n -t x1 -v -j $offset -N $nbytes "$file" 2>/dev/null)
> +
> + # Remove whitespace and check if any byte is non-zero
> + stripped=$(printf '%s' "$data" | tr -d ' \n\t')
> + if [ -n "$stripped" ] && ! echo "$stripped" | grep -qE "^0+$"; then
> + echo "FAIL: non-zero data in gap [$offset,$((offset + nbytes))) $label"
> + _hexdump -N $((offset + nbytes)) "$file"
> + return 1
> + fi
> + return 0
> +}
> +
> +# Get the physical block offset (in bytes) of the file's first block on device
> +_get_phys_offset()
> +{
> + local file="$1"
> + local fiemap_output
> + local phys_blk
> +
> + fiemap_output=$($XFS_IO_PROG -r -c "fiemap -v" "$file" 2>/dev/null)
> + phys_blk=$(echo "$fiemap_output" | _filter_xfs_io_fiemap | head -1 | awk '{print $3}')
> + if [ -z "$phys_blk" ]; then
> + echo ""
> + return
> + fi
> + # Convert 512-byte blocks to bytes
> + echo $((phys_blk * 512))
> +}
> +
> +_test_eof_zeroing()
> +{
> + local test_name="$1"
> + local extend_cmd="$2"
> + local expected_new_sz="$3"
> + local file=$SCRATCH_MNT/testfile_${test_name}
> +
> + echo "$test_name" | tee -a $seqres.full
> +
> + # Compute non-block-aligned EOF offset
> + local gap_bytes=16
> + local eof_offset=$((blksz - gap_bytes))
> +
> + # Step 1: Write one full block to ensure the filesystem allocates a
> + # physical block for the file instead of using inline data.
> + $XFS_IO_PROG -f -c "pwrite -S 0x5a 0 $blksz" -c fsync \
> + "$file" >> $seqres.full 2>&1
> +
> + # Step 2: Get physical block offset on device via FIEMAP
> + local phys_offset
> + phys_offset=$(_get_phys_offset "$file")
> + if [ -z "$phys_offset" ]; then
> + _fail "$test_name: failed to get physical block offset via fiemap"
> + fi
> +
> + # Step 3: Truncate file to non-block-aligned size and fsync.
> + # The on-disk region [eof_offset, blksz) may or may not be
> + # zeroed by the filesystem at this point.
> + $XFS_IO_PROG -c "truncate $eof_offset" -c fsync \
> + "$file" >> $seqres.full 2>&1
> +
> + # Step 4: Unmount and restore the physical block to all-0x5a on disk.
> + # This bypasses the kernel's pagecache EOF-zeroing to ensure
> + # the stale pattern is present on disk. Then remount.
> + _scratch_unmount
> + $XFS_IO_PROG -d -c "pwrite -S 0x5a $phys_offset $blksz" \
> + $SCRATCH_DEV >> $seqres.full 2>&1
> + if [ $? -ne 0 ]; then
> + _fail "$test_name: failed to inject stale data on disk"
> + fi
> + _scratch_mount >> $seqres.full 2>&1
> +
> + # Step 5: Execute the extend operation.
> + $XFS_IO_PROG -c "$extend_cmd" "$file" >> $seqres.full 2>&1
> +
> + # Step 6: Verify gap [eof_offset, blksz) is zeroed BEFORE shutdown
> + _check_gap_zero "$file" $eof_offset $gap_bytes "before shutdown" || return 1
> +
> + # Step 7: Sync the extended range and shutdown the filesystem with
> + # journal flush. This persists the file size extending, and
> + # the filesystem should persist the zeroed data in the gap
> + # range as well.
> + if [ "$extend_cmd" != "${extend_cmd#pwrite}" ]; then
> + $XFS_IO_PROG -c "sync_range -w $blksz $blksz" \
> + -c "sync_range -a $blksz $blksz" \
> + "$file" >> $seqres.full 2>&1
> + fi
> + _scratch_shutdown -f
Wow, the usage of sync_range in step#7 is impressive !
Reviewed-by: Zorro Lang <zlang@kernel.org>
> +
> + # Step 8: Remount and verify gap is still zeroed
> + _scratch_cycle_mount
> +
> + # Verify file size was not rolled back after shutdown+remount
> + local sz
> + sz=$(stat -c %s "$file")
> + if [ "$sz" -ne "$expected_new_sz" ]; then
> + _fail "$test_name: file size rolled back after shutdown+remount: $sz != $expected_new_sz"
> + fi
> +
> + _check_gap_zero "$file" $eof_offset $gap_bytes "after shutdown+remount" || return 1
> +}
> +
> +_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mount
> +
> +blksz=$(_get_block_size $SCRATCH_MNT)
> +
> +# Test three variants of EOF-extending operations
> +_test_eof_zeroing "append_write" "pwrite -S 0x42 $blksz $blksz" $((blksz * 2))
> +_test_eof_zeroing "truncate_up" "truncate $((blksz * 2))" $((blksz * 2))
> +_test_eof_zeroing "fallocate" "falloc $blksz $blksz" $((blksz * 2))
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/790.out b/tests/generic/790.out
> new file mode 100644
> index 00000000..e5e2cc09
> --- /dev/null
> +++ b/tests/generic/790.out
> @@ -0,0 +1,4 @@
> +QA output created by 790
> +append_write
> +truncate_up
> +fallocate
> --
> 2.52.0
>
^ permalink raw reply
* Re: [syzbot] [ext4?] INFO: task jbd2/sda1-NUM:NUM blocked in I/O wait for more than NUM seconds.
From: syzbot @ 2026-05-01 14:48 UTC (permalink / raw)
To: jack, linux-ext4, linux-kernel, syzkaller-bugs, tytso
In-Reply-To: <69f3f165.170a0220.5f1b.0010.GAE@google.com>
syzbot has found a reproducer for the following issue on:
HEAD commit: 26fd6bff2c05 Merge tag 'mtd/fixes-for-7.1-rc2' of git://gi..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=1229bece580000
kernel config: https://syzkaller.appspot.com/x/.config?x=d0f0911eedbc130a
dashboard link: https://syzkaller.appspot.com/bug?extid=c7604c9fdd7580cca4e0
compiler: gcc (Debian 14.2.0-19) 14.2.0, GNU ld (GNU Binutils for Debian) 2.44
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=17f37506580000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=171441ce580000
Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/7d72741f9879/disk-26fd6bff.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/b02c6a1eba87/vmlinux-26fd6bff.xz
kernel image: https://storage.googleapis.com/syzbot-assets/4f218b09b68f/bzImage-26fd6bff.xz
IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+c7604c9fdd7580cca4e0@syzkaller.appspotmail.com
INFO: task jbd2/sda1-8:4955 blocked in I/O wait for more than 143 seconds.
Not tainted syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:jbd2/sda1-8 state:D stack:26088 pid:4955 tgid:4955 ppid:2 task_flags:0x240040 flags:0x00080000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
__wait_on_buffer+0x64/0x70 fs/buffer.c:123
wait_on_buffer include/linux/buffer_head.h:420 [inline]
jbd2_journal_commit_transaction+0x388a/0x6870 fs/jbd2/commit.c:837
kjournald2+0x200/0x760 fs/jbd2/journal.c:201
kthread+0x370/0x450 kernel/kthread.c:436
ret_from_fork+0x72b/0xd50 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
INFO: task syz.0.189:6070 blocked in I/O wait for more than 143 seconds.
Not tainted syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz.0.189 state:D stack:28240 pid:6070 tgid:6070 ppid:5783 task_flags:0x440040 flags:0x00080002
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
do_get_write_access+0x84f/0x1220 fs/jbd2/transaction.c:1113
jbd2_journal_get_write_access+0x1d6/0x280 fs/jbd2/transaction.c:1263
__ext4_journal_get_write_access+0x6a/0x340 fs/ext4/ext4_jbd2.c:241
ext4_reserve_inode_write+0x1b7/0x330 fs/ext4/inode.c:6375
__ext4_mark_inode_dirty+0x18f/0x890 fs/ext4/inode.c:6550
ext4_dirty_inode+0xd9/0x130 fs/ext4/inode.c:6587
__mark_inode_dirty+0x1f3/0x1720 fs/fs-writeback.c:2623
generic_update_time fs/inode.c:2192 [inline]
file_update_time_flags+0x46b/0x500 fs/inode.c:2422
ext4_page_mkwrite+0x324/0x1890 fs/ext4/inode.c:6753
do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
do_shared_fault mm/memory.c:5969 [inline]
do_fault+0x3b5/0x1750 mm/memory.c:6031
do_pte_missing mm/memory.c:4550 [inline]
handle_pte_fault mm/memory.c:6411 [inline]
__handle_mm_fault+0x187d/0x2a00 mm/memory.c:6549
handle_mm_fault+0x36d/0xa20 mm/memory.c:6718
do_user_addr_fault+0x5a3/0x12f0 arch/x86/mm/fault.c:1334
handle_page_fault arch/x86/mm/fault.c:1474 [inline]
exc_page_fault+0x6f/0xd0 arch/x86/mm/fault.c:1527
asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:618
RIP: 0033:0x7f97a466a883
RSP: 002b:00007ffc3f813c60 EFLAGS: 00010246
RAX: 000000000003fde8 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000001b2e124000 RSI: 0000000000040000 RDI: 00007f97a49db710
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffc3f813ef0
</TASK>
Showing all locks held in the system:
1 lock held by ksoftirqd/1/23:
#0: ffff8880b853b3e0 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x2c/0x140 kernel/sched/core.c:652
1 lock held by khungtaskd/30:
#0: ffffffff8e7e52e0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline]
#0: ffffffff8e7e52e0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline]
#0: ffffffff8e7e52e0 (rcu_read_lock){....}-{1:3}, at: debug_show_all_locks+0x3d/0x184 kernel/locking/lockdep.c:6775
6 locks held by kworker/u8:3/47:
#0: ffff88801c6ca140 ((wq_completion)writeback){+.+.}-{0:0}, at: process_one_work+0x12d6/0x1980 kernel/workqueue.c:3277
#1: ffffc90000b77d08 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}, at: process_one_work+0x973/0x1980 kernel/workqueue.c:3278
#2: ffff8880389060d8 (&type->s_umount_key#33){++++}-{4:4}, at: super_trylock_shared+0x1e/0xf0 fs/super.c:565
#3: ffff888038904c18 (&sbi->s_writepages_rwsem){++++}-{0:0}, at: do_writepages+0x278/0x600 mm/page-writeback.c:2575
#4: ffff888038902938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
#5: ffff88807928f290 (&ei->i_data_sem){++++}-{4:4}, at: ext4_map_blocks+0x45a/0xd30 fs/ext4/inode.c:823
2 locks held by getty/5383:
#0: ffff88802dace0a0 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc9000322b2e8 (&ldata->atomic_read_lock){+.+.}-{4:4}, at: n_tty_read+0x419/0x14f0 drivers/tty/n_tty.c:2211
3 locks held by syz.0.189/6070:
#0: ffff88807e006bc8 (vm_lock){++++}-{0:0}, at: lock_vma_under_rcu+0x11d/0x590 mm/mmap_lock.c:310
#1: ffff888038906508 (sb_pagefaults){.+.+}-{0:0}, at: do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
#2: ffff888038902938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
3 locks held by syz-executor/6077:
#0: ffff888038906410 (sb_writers#4){.+.+}-{0:0}, at: filename_create+0x10d/0x400 fs/namei.c:4943
#1: ffff8880792b1f98 (&type->i_mutex_dir_key#3/1){+.+.}-{4:4}, at: inode_lock_nested include/linux/fs.h:1074 [inline]
#1: ffff8880792b1f98 (&type->i_mutex_dir_key#3/1){+.+.}-{4:4}, at: __start_dirop fs/namei.c:2914 [inline]
#1: ffff8880792b1f98 (&type->i_mutex_dir_key#3/1){+.+.}-{4:4}, at: start_dirop fs/namei.c:2938 [inline]
#1: ffff8880792b1f98 (&type->i_mutex_dir_key#3/1){+.+.}-{4:4}, at: filename_create+0x1c0/0x400 fs/namei.c:4950
#2: ffff888038902938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
=============================================
NMI backtrace for cpu 0
CPU: 0 UID: 0 PID: 30 Comm: khungtaskd Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120
nmi_cpu_backtrace.cold+0x12d/0x151 lib/nmi_backtrace.c:113
nmi_trigger_cpumask_backtrace+0x1d7/0x230 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:162 [inline]
__sys_info lib/sys_info.c:157 [inline]
sys_info+0x141/0x190 lib/sys_info.c:165
check_hung_uninterruptible_tasks kernel/hung_task.c:353 [inline]
watchdog+0xcb1/0x1030 kernel/hung_task.c:561
kthread+0x370/0x450 kernel/kthread.c:436
ret_from_fork+0x72b/0xd50 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
Sending NMI from CPU 0 to CPUs 1:
NMI backtrace for cpu 1
CPU: 1 UID: 0 PID: 1057 Comm: kworker/u8:7 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
Workqueue: events_unbound toggle_allocation_gate
RIP: 0010:smp_call_function_many_cond+0xdd4/0x1700 kernel/smp.c:871
Code: 00 00 0f 84 1e 05 00 00 e8 c9 6a 0c 00 83 c5 01 bf 07 00 00 00 48 63 dd 48 89 de e8 96 65 0c 00 48 83 fb 07 0f 86 22 fb ff ff <44> 8b 64 24 58 44 8b 7c 24 5c e8 9d 6a 0c 00 8b 5c 24 4c bf 01 00
RSP: 0018:ffffc90003d37870 EFLAGS: 00000246
RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81fb686f
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888029b83d80
RBP: 0000000000000000 R08: 0000000000000007 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffc90003d37958
R13: ffff8880b843c6c0 R14: ffff8880b843c601 R15: ffff8880b853c710
FS: 0000000000000000(0000) GS:ffff88812447d000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055a400fed660 CR3: 000000000e596000 CR4: 00000000003526f0
Call Trace:
<TASK>
on_each_cpu_cond_mask+0x40/0x90 kernel/smp.c:1057
on_each_cpu include/linux/smp.h:72 [inline]
smp_text_poke_sync_each_cpu arch/x86/kernel/alternative.c:2773 [inline]
smp_text_poke_batch_finish+0x976/0xc60 arch/x86/kernel/alternative.c:3045
arch_jump_label_transform_apply+0x1c/0x30 arch/x86/kernel/jump_label.c:146
jump_label_update+0x37a/0x550 kernel/jump_label.c:910
static_key_enable_cpuslocked+0x1bc/0x270 kernel/jump_label.c:210
static_key_enable+0x1a/0x20 kernel/jump_label.c:223
toggle_allocation_gate mm/kfence/core.c:906 [inline]
toggle_allocation_gate+0xfe/0x2d0 mm/kfence/core.c:898
process_one_work+0xa0e/0x1980 kernel/workqueue.c:3302
process_scheduled_works kernel/workqueue.c:3385 [inline]
worker_thread+0x5ef/0xe50 kernel/workqueue.c:3466
kthread+0x370/0x450 kernel/kthread.c:436
ret_from_fork+0x72b/0xd50 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
----------------
Code disassembly (best guess):
0: 00 00 add %al,(%rax)
2: 0f 84 1e 05 00 00 je 0x526
8: e8 c9 6a 0c 00 call 0xc6ad6
d: 83 c5 01 add $0x1,%ebp
10: bf 07 00 00 00 mov $0x7,%edi
15: 48 63 dd movslq %ebp,%rbx
18: 48 89 de mov %rbx,%rsi
1b: e8 96 65 0c 00 call 0xc65b6
20: 48 83 fb 07 cmp $0x7,%rbx
24: 0f 86 22 fb ff ff jbe 0xfffffb4c
* 2a: 44 8b 64 24 58 mov 0x58(%rsp),%r12d <-- trapping instruction
2f: 44 8b 7c 24 5c mov 0x5c(%rsp),%r15d
34: e8 9d 6a 0c 00 call 0xc6ad6
39: 8b 5c 24 4c mov 0x4c(%rsp),%ebx
3d: bf .byte 0xbf
3e: 01 00 add %eax,(%rax)
---
If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.
^ permalink raw reply
* Re: [f2fs-dev] [GIT PULL] fscrypt updates for 6.7
From: patchwork-bot+f2fs @ 2026-05-01 4:52 UTC (permalink / raw)
To: Eric Biggers
Cc: torvalds, tytso, linux-kernel, linux-f2fs-devel, linux-fscrypt,
linux-fsdevel, jaegeuk, linux-ext4, linux-btrfs
In-Reply-To: <20231030040419.GA43439@sol.localdomain>
Hello:
This pull request was applied to jaegeuk/f2fs.git (dev)
by Linus Torvalds <torvalds@linux-foundation.org>:
On Sun, 29 Oct 2023 21:04:19 -0700 you wrote:
> The following changes since commit 6465e260f48790807eef06b583b38ca9789b6072:
>
> Linux 6.6-rc3 (2023-09-24 14:31:13 -0700)
>
> are available in the Git repository at:
>
> https://git.kernel.org/pub/scm/fs/fscrypt/linux.git tags/fscrypt-for-linus
>
> [...]
Here is the summary with links:
- [f2fs-dev,GIT,PULL] fscrypt updates for 6.7
https://git.kernel.org/jaegeuk/f2fs/c/9932f00bf40d
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* [syzbot] INFO: task jbd2/sda1-NUM:NUM blocked in I/O wait for more than NUM seconds.
From: syzbot @ 2026-05-01 0:18 UTC (permalink / raw)
To: jack, linux-ext4, linux-kernel, syzkaller-bugs, tytso
Hello,
syzbot found the following issue on:
HEAD commit: dd6c438c3e64 Merge tag 'vfs-7.1-rc1.fixes' of git://git.ke..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=128692d2580000
kernel config: https://syzkaller.appspot.com/x/.config?x=bf32d14ab63e8442
dashboard link: https://syzkaller.appspot.com/bug?extid=c7604c9fdd7580cca4e0
compiler: gcc (Debian 14.2.0-19) 14.2.0, GNU ld (GNU Binutils for Debian) 2.44
Unfortunately, I don't have any reproducer for this issue yet.
Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/e8b40cfa7daf/disk-dd6c438c.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/50e3a472008e/vmlinux-dd6c438c.xz
kernel image: https://storage.googleapis.com/syzbot-assets/dee724704545/bzImage-dd6c438c.xz
IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+c7604c9fdd7580cca4e0@syzkaller.appspotmail.com
INFO: task jbd2/sda1-8:5176 blocked in I/O wait for more than 143 seconds.
Tainted: G L syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:jbd2/sda1-8 state:D stack:26072 pid:5176 tgid:5176 ppid:2 task_flags:0x240040 flags:0x00080000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
__wait_on_buffer+0x64/0x70 fs/buffer.c:123
wait_on_buffer include/linux/buffer_head.h:420 [inline]
jbd2_journal_commit_transaction+0x388a/0x6870 fs/jbd2/commit.c:837
kjournald2+0x200/0x760 fs/jbd2/journal.c:201
kthread+0x370/0x450 kernel/kthread.c:436
ret_from_fork+0x72b/0xd50 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
INFO: task syz-executor:5832 blocked in I/O wait for more than 143 seconds.
Tainted: G L syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz-executor state:D stack:22688 pid:5832 tgid:5832 ppid:5831 task_flags:0x440100 flags:0x00080000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
do_get_write_access+0x84f/0x1220 fs/jbd2/transaction.c:1113
jbd2_journal_get_write_access+0x1d6/0x280 fs/jbd2/transaction.c:1263
__ext4_journal_get_write_access+0x6a/0x340 fs/ext4/ext4_jbd2.c:241
ext4_reserve_inode_write+0x1b7/0x330 fs/ext4/inode.c:6375
__ext4_mark_inode_dirty+0x18f/0x890 fs/ext4/inode.c:6550
ext4_dirty_inode+0xd9/0x130 fs/ext4/inode.c:6587
__mark_inode_dirty+0x1f3/0x1720 fs/fs-writeback.c:2623
generic_update_time fs/inode.c:2192 [inline]
file_update_time_flags+0x46b/0x500 fs/inode.c:2422
ext4_page_mkwrite+0x324/0x1890 fs/ext4/inode.c:6753
do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
wp_page_shared mm/memory.c:4069 [inline]
do_wp_page+0xa77/0x4350 mm/memory.c:4288
handle_pte_fault mm/memory.c:6427 [inline]
__handle_mm_fault+0x1ab6/0x2a00 mm/memory.c:6549
handle_mm_fault+0x36d/0xa20 mm/memory.c:6718
do_user_addr_fault+0x5a3/0x12f0 arch/x86/mm/fault.c:1334
handle_page_fault arch/x86/mm/fault.c:1474 [inline]
exc_page_fault+0x6f/0xd0 arch/x86/mm/fault.c:1527
asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:618
RIP: 0033:0x7fa035d6eec0
RSP: 002b:00007ffc588679c8 EFLAGS: 00010246
RAX: 00007fa02f7e2ffc RBX: 00007ffc58867b70 RCX: 0000000000000000
RDX: 0000000000000010 RSI: 0000000000000000 RDI: 00007fa02f7e2ffc
RBP: 0000000000000010 R08: 0000000000000000 R09: 0000000000000000
R10: 00007fa02f40021c R11: 000000000000000e R12: 000000000001cff4
R13: 000000000001cfda R14: 0000000000000010 R15: 00007ffc58867b70
</TASK>
INFO: task syz.0.297:7190 blocked in I/O wait for more than 144 seconds.
Tainted: G L syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz.0.297 state:D stack:27128 pid:7190 tgid:7190 ppid:5849 task_flags:0x440040 flags:0x00080002
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
do_get_write_access+0x84f/0x1220 fs/jbd2/transaction.c:1113
jbd2_journal_get_write_access+0x1d6/0x280 fs/jbd2/transaction.c:1263
__ext4_journal_get_write_access+0x6a/0x340 fs/ext4/ext4_jbd2.c:241
ext4_reserve_inode_write+0x1b7/0x330 fs/ext4/inode.c:6375
__ext4_mark_inode_dirty+0x18f/0x890 fs/ext4/inode.c:6550
ext4_dirty_inode+0xd9/0x130 fs/ext4/inode.c:6587
__mark_inode_dirty+0x1f3/0x1720 fs/fs-writeback.c:2623
generic_update_time fs/inode.c:2192 [inline]
file_update_time_flags+0x46b/0x500 fs/inode.c:2422
ext4_page_mkwrite+0x324/0x1890 fs/ext4/inode.c:6753
do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
do_shared_fault mm/memory.c:5969 [inline]
do_fault+0x3b5/0x1750 mm/memory.c:6031
do_pte_missing mm/memory.c:4550 [inline]
handle_pte_fault mm/memory.c:6411 [inline]
__handle_mm_fault+0x187d/0x2a00 mm/memory.c:6549
handle_mm_fault+0x36d/0xa20 mm/memory.c:6718
do_user_addr_fault+0x5a3/0x12f0 arch/x86/mm/fault.c:1334
handle_page_fault arch/x86/mm/fault.c:1474 [inline]
exc_page_fault+0x6f/0xd0 arch/x86/mm/fault.c:1527
asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:618
RIP: 0033:0x7f4957e70f40
RSP: 002b:00007fffdf4618d0 EFLAGS: 00010202
RAX: 0000001b31ef5000 RBX: ffffffff82568f11 RCX: 0000001b31ef4ff8
RDX: 0000001b31b24220 RSI: 0000000000000008 RDI: 00007f4958d45720
RBP: 00000000000001a6 R08: 00007f4958200000 R09: 00007f4958202000
R10: 0000000082568f15 R11: 0000000000000010 R12: 00007f4958216038
R13: 00000000000002e8 R14: ffffffff825688e4 R15: 00007f4958d45720
</TASK>
INFO: task syz.1.299:7195 blocked in I/O wait for more than 144 seconds.
Tainted: G L syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz.1.299 state:D stack:27384 pid:7195 tgid:7195 ppid:5843 task_flags:0x440040 flags:0x00080002
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
do_get_write_access+0x84f/0x1220 fs/jbd2/transaction.c:1113
jbd2_journal_get_write_access+0x1d6/0x280 fs/jbd2/transaction.c:1263
__ext4_journal_get_write_access+0x6a/0x340 fs/ext4/ext4_jbd2.c:241
ext4_reserve_inode_write+0x1b7/0x330 fs/ext4/inode.c:6375
__ext4_mark_inode_dirty+0x18f/0x890 fs/ext4/inode.c:6550
ext4_dirty_inode+0xd9/0x130 fs/ext4/inode.c:6587
__mark_inode_dirty+0x1f3/0x1720 fs/fs-writeback.c:2623
generic_update_time fs/inode.c:2192 [inline]
touch_atime+0x642/0x7a0 fs/inode.c:2267
file_accessed include/linux/fs.h:2264 [inline]
ext4_file_mmap_prepare+0x56d/0x670 fs/ext4/file.c:840
vfs_mmap_prepare include/linux/fs.h:2076 [inline]
call_mmap_prepare mm/vma.c:2672 [inline]
__mmap_region+0xe98/0x2da0 mm/vma.c:2755
mmap_region+0x527/0x620 mm/vma.c:2856
do_mmap+0xc63/0x12f0 mm/mmap.c:560
vm_mmap_pgoff+0x29e/0x470 mm/util.c:581
ksys_mmap_pgoff+0x3cb/0x610 mm/mmap.c:606
__do_sys_mmap arch/x86/kernel/sys_x86_64.c:89 [inline]
__se_sys_mmap arch/x86/kernel/sys_x86_64.c:82 [inline]
__x64_sys_mmap+0x125/0x190 arch/x86/kernel/sys_x86_64.c:82
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x10b/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f8e3d39c582
RSP: 002b:00007fffab0ea828 EFLAGS: 00000206 ORIG_RAX: 0000000000000009
RAX: ffffffffffffffda RBX: 0000001b31c64000 RCX: 00007f8e3d39c582
RDX: 0000000000000003 RSI: 00000000003c0000 RDI: 0000001b31c64000
RBP: 0000000000100001 R08: 0000000000000004 R09: 0000000000040000
R10: 0000000000100001 R11: 0000000000000206 R12: 0000000000000047
R13: 00000000000927c0 R14: 000000000002f9d5 R15: 00007fffab0eab00
</TASK>
INFO: task syz.2.298:7196 blocked in I/O wait for more than 144 seconds.
Tainted: G L syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz.2.298 state:D stack:27384 pid:7196 tgid:7196 ppid:5853 task_flags:0x440040 flags:0x00080002
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5387 [inline]
__schedule+0x10e9/0x6820 kernel/sched/core.c:7188
__schedule_loop kernel/sched/core.c:7267 [inline]
schedule+0xdd/0x390 kernel/sched/core.c:7282
io_schedule+0x8a/0xf0 kernel/sched/core.c:8109
bit_wait_io+0xd/0xe0 kernel/sched/wait_bit.c:250
__wait_on_bit+0x65/0x180 kernel/sched/wait_bit.c:52
out_of_line_wait_on_bit+0xdc/0x110 kernel/sched/wait_bit.c:67
wait_on_bit_io include/linux/wait_bit.h:105 [inline]
do_get_write_access+0x84f/0x1220 fs/jbd2/transaction.c:1113
jbd2_journal_get_write_access+0x1d6/0x280 fs/jbd2/transaction.c:1263
__ext4_journal_get_write_access+0x6a/0x340 fs/ext4/ext4_jbd2.c:241
ext4_reserve_inode_write+0x1b7/0x330 fs/ext4/inode.c:6375
__ext4_mark_inode_dirty+0x18f/0x890 fs/ext4/inode.c:6550
ext4_dirty_inode+0xd9/0x130 fs/ext4/inode.c:6587
__mark_inode_dirty+0x1f3/0x1720 fs/fs-writeback.c:2623
generic_update_time fs/inode.c:2192 [inline]
touch_atime+0x642/0x7a0 fs/inode.c:2267
file_accessed include/linux/fs.h:2264 [inline]
ext4_file_mmap_prepare+0x56d/0x670 fs/ext4/file.c:840
vfs_mmap_prepare include/linux/fs.h:2076 [inline]
call_mmap_prepare mm/vma.c:2672 [inline]
__mmap_region+0xe98/0x2da0 mm/vma.c:2755
mmap_region+0x527/0x620 mm/vma.c:2856
do_mmap+0xc63/0x12f0 mm/mmap.c:560
vm_mmap_pgoff+0x29e/0x470 mm/util.c:581
ksys_mmap_pgoff+0x3cb/0x610 mm/mmap.c:606
__do_sys_mmap arch/x86/kernel/sys_x86_64.c:89 [inline]
__se_sys_mmap arch/x86/kernel/sys_x86_64.c:82 [inline]
__x64_sys_mmap+0x125/0x190 arch/x86/kernel/sys_x86_64.c:82
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x10b/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f6eda39c582
RSP: 002b:00007ffc7d052448 EFLAGS: 00000206 ORIG_RAX: 0000000000000009
RAX: ffffffffffffffda RBX: 0000001b31d64000 RCX: 00007f6eda39c582
RDX: 0000000000000003 RSI: 00000000003c0000 RDI: 0000001b31d64000
RBP: 0000000000100001 R08: 0000000000000004 R09: 0000000000040000
R10: 0000000000100001 R11: 0000000000000206 R12: 0000000000000048
R13: 00000000000927c0 R14: 000000000002fd0a R15: 00007ffc7d052720
</TASK>
Showing all locks held in the system:
6 locks held by kworker/u8:1/13:
#0: ffff888020e8a940 ((wq_completion)writeback){+.+.}-{0:0}, at: process_one_work+0x12d6/0x1980 kernel/workqueue.c:3277
#1: ffffc90000127d08 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}, at: process_one_work+0x973/0x1980 kernel/workqueue.c:3278
#2: ffff88803313a0d8 (&type->s_umount_key#33){++++}-{4:4}, at: super_trylock_shared+0x1e/0xf0 fs/super.c:565
#3: ffff888033138c18 (&sbi->s_writepages_rwsem){++++}-{0:0}, at: do_writepages+0x278/0x600 mm/page-writeback.c:2575
#4: ffff8880330b6938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
#5: ffff88807a935fd0 (&ei->i_data_sem){++++}-{4:4}, at: ext4_map_blocks+0x45a/0xd30 fs/ext4/inode.c:823
1 lock held by khungtaskd/31:
#0: ffffffff8e7e5260 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline]
#0: ffffffff8e7e5260 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline]
#0: ffffffff8e7e5260 (rcu_read_lock){....}-{1:3}, at: debug_show_all_locks+0x3d/0x184 kernel/locking/lockdep.c:6775
2 locks held by getty/5602:
#0: ffff888038d560a0 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x24/0x80 drivers/tty/tty_ldisc.c:243
#1: ffffc9000322b2e8 (&ldata->atomic_read_lock){+.+.}-{4:4}, at: n_tty_read+0x419/0x14f0 drivers/tty/n_tty.c:2211
3 locks held by syz-executor/5832:
#0: ffff888037e99448 (vm_lock){++++}-{0:0}, at: lock_vma_under_rcu+0x11d/0x590 mm/mmap_lock.c:310
#1: ffff88803313a508 (sb_pagefaults){.+.+}-{0:0}, at: do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
#2: ffff8880330b6938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
3 locks held by syz.0.297/7190:
#0: ffff8880565b7948 (vm_lock){++++}-{0:0}, at: lock_vma_under_rcu+0x11d/0x590 mm/mmap_lock.c:310
#1: ffff88803313a508 (sb_pagefaults){.+.+}-{0:0}, at: do_page_mkwrite+0x17a/0x440 mm/memory.c:3668
#2: ffff8880330b6938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
3 locks held by syz.1.299/7195:
#0: ffff888028438f78 (&mm->mmap_lock){++++}-{4:4}, at: mmap_write_lock_killable include/linux/mmap_lock.h:554 [inline]
#0: ffff888028438f78 (&mm->mmap_lock){++++}-{4:4}, at: vm_mmap_pgoff+0x1f5/0x470 mm/util.c:579
#1: ffff88803313a410 (sb_writers#4){.+.+}-{0:0}, at: file_accessed include/linux/fs.h:2264 [inline]
#1: ffff88803313a410 (sb_writers#4){.+.+}-{0:0}, at: ext4_file_mmap_prepare+0x56d/0x670 fs/ext4/file.c:840
#2: ffff8880330b6938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
3 locks held by syz.2.298/7196:
#0: ffff888028438338 (&mm->mmap_lock){++++}-{4:4}, at: mmap_write_lock_killable include/linux/mmap_lock.h:554 [inline]
#0: ffff888028438338 (&mm->mmap_lock){++++}-{4:4}, at: vm_mmap_pgoff+0x1f5/0x470 mm/util.c:579
#1: ffff88803313a410 (sb_writers#4){.+.+}-{0:0}, at: file_accessed include/linux/fs.h:2264 [inline]
#1: ffff88803313a410 (sb_writers#4){.+.+}-{0:0}, at: ext4_file_mmap_prepare+0x56d/0x670 fs/ext4/file.c:840
#2: ffff8880330b6938 (jbd2_handle){++++}-{0:0}, at: start_this_handle+0xfaa/0x13a0 fs/jbd2/transaction.c:444
=============================================
NMI backtrace for cpu 0
CPU: 0 UID: 0 PID: 31 Comm: khungtaskd Tainted: G L syzkaller #0 PREEMPT(full)
Tainted: [L]=SOFTLOCKUP
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120
nmi_cpu_backtrace.cold+0x12d/0x151 lib/nmi_backtrace.c:113
nmi_trigger_cpumask_backtrace+0x1d7/0x230 lib/nmi_backtrace.c:62
trigger_all_cpu_backtrace include/linux/nmi.h:162 [inline]
__sys_info lib/sys_info.c:157 [inline]
sys_info+0x141/0x190 lib/sys_info.c:165
check_hung_uninterruptible_tasks kernel/hung_task.c:353 [inline]
watchdog+0xcb1/0x1030 kernel/hung_task.c:561
kthread+0x370/0x450 kernel/kthread.c:436
ret_from_fork+0x72b/0xd50 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title
If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)
If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report
If you want to undo deduplication, reply with:
#syz undup
^ permalink raw reply
* Re: [GIT PULL v5.1] libfuse: run fuse servers as a contained service
From: Darrick J. Wong @ 2026-04-30 22:49 UTC (permalink / raw)
To: Bernd Schubert
Cc: fuse-devel, joannelkoong, linux-ext4, linux-fsdevel, miklos, neal
In-Reply-To: <9c920b17-81a8-4c1e-921e-4c08cc290cb5@bsbernd.com>
On Thu, Apr 30, 2026 at 11:34:06PM +0200, Bernd Schubert wrote:
> Hi Darrick,
>
> On 4/30/26 23:18, Darrick J. Wong wrote:
> > Hi Bernd,
> >
> > Please pull this branch with changes for libfuse.
> >
> > As usual, I did a test-merge with the main upstream branch as of a few
> > minutes ago, and didn't see any conflicts. Please let me know if you
> > encounter any problems.
>
> pushed to my github branch. BSD build fails with
>
> 2026-04-30T21:25:16.3874802Z FAILED: [code=1] lib/libfuse3.so.3.19.0
> 2026-04-30T21:25:16.3906762Z cc -o lib/libfuse3.so.3.19.0 lib/libfuse3.so.3.19.0.p/fuse.c.o lib/libfuse3.so.3.19.0.p/fuse_loop.c.o lib/libfuse3.so.3.19.0.p/fuse_loop_mt.c.o lib/libfuse3.so.3.19.0.p/fuse_lowlevel.c.o lib/libfuse3.so.3.19.0.p/fuse_opt.c.o lib/libfuse3.so.3.19.0.p/fuse_signals.c.o lib/libfuse3.so.3.19.0.p/buffer.c.o lib/libfuse3.so.3.19.0.p/cuse_lowlevel.c.o lib/libfuse3.so.3.19.0.p/helper.c.o lib/libfuse3.so.3.19.0.p/modules_subdir.c.o lib/libfuse3.so.3.19.0.p/mount_util.c.o lib/libfuse3.so.3.19.0.p/fuse_log.c.o lib/libfuse3.so.3.19.0.p/compat.c.o lib/libfuse3.so.3.19.0.p/util.c.o lib/libfuse3.so.3.19.0.p/mount_bsd.c.o lib/libfuse3.so.3.19.0.p/fuse_service_stub.c.o lib/libfuse3.so.3.19.0.p/modules_iconv.c.o -Wl,--as-needed -Wl,--no-undefined -shared -fPIC -Wl,-soname,libfuse3.so.4 -Wl,--version-script,/home/runner/work/libfuse/libfuse/lib/fuse_versionscript -pthread -Wl,--start-group -ldl -lrt -Wl,--end-group
> 2026-04-30T21:25:16.3939590Z ld: error: version script assignment of 'FUSE_3.19' to symbol 'fuse_service_can_allow_other' failed: symbol not defined
Aha, that function got left out of the stub. :(
Annoyingly, on Linux the build succeeds despite the missing symbol
when I tweak meson so that it doesn't build the service stuff. I would
have thought that --no-undefined would have done that, but alas.
Sorry about that too. The following patch fixes it.
diff --git i/lib/fuse_service_stub.c w/lib/fuse_service_stub.c
index d34df3891a6e31..231b98423df628 100644
--- i/lib/fuse_service_stub.c
+++ w/lib/fuse_service_stub.c
@@ -49,12 +49,17 @@ int fuse_service_send_goodbye(struct fuse_service *sf, int error)
int fuse_service_accept(struct fuse_service **sfp)
{
*sfp = NULL;
return 0;
}
+bool fuse_service_can_allow_other(struct fuse_service *sf)
+{
+ return false;
+}
+
int fuse_service_append_args(struct fuse_service *sf,
struct fuse_args *existing_args)
{
return -EOPNOTSUPP;
}
> 2026-04-30T21:25:16.3951874Z cc: error: linker command failed with exit code 1 (use -v to see invocation)
> 2026-04-30T21:25:16.4291582Z [44/82] cc -Itest/test_teardown_watchdog.p -Itest -I../test -Iinclude -I../include -Ilib -I../lib -I. -I.. -fdiagnostics-color=always -
>
>
> checkpatch, CodeChecker-cppcheck, CodeChecker-gcc also all fail. This CodeQL
Not sure why checkpatch fails, this is what I got (Debian 13):
$ git diff origin/master.. | ./checkpatch.pl --max-line-length=100 --no-tree --ignore MAINTAINERS,SPDX_LICENSE_TAG,COMMIT_MESSAGE,FILE_PATH_CHANGES,EMAIL_SUBJECT,AVOID_EXTERNS,GIT_COMMIT_ID,ENOSYS_SYSCALL,ENOSYS,FROM_SIGN_OFF_MISMATCH,QUOTED_COMMIT_ID,,PREFER_ATTRIBUTE_ALWAYS_UNUSED,PREFER_DEFINED_ATTRIBUTE_MACRO,STRCPY,STRNCPY -
No typos will be found - file '/storage/home/djwong/cdev/work/libfuse/spelling.txt': No such file or directory
No structs that should be const will be found - file '/storage/home/djwong/cdev/work/libfuse/const_structs.checkpatch': No such file or directory
total: 0 errors, 0 warnings, 3908 lines checked
Your patch has no obvious style problems and is ready for submission.
NOTE: Ignored message types: AVOID_EXTERNS COMMIT_MESSAGE EMAIL_SUBJECT ENOSYS ENOSYS_SYSCALL FILE_PATH_CHANGES FROM_SIGN_OFF_MISMATCH GIT_COMMIT_ID MAINTAINERS PREFER_ATTRIBUTE_ALWAYS_UNUSED PREFER_DEFINED_ATTRIBUTE_MACRO QUOTED_COMMIT_ID SPDX_LICENSE_TAG STRCPY STRNCPY
cppcheck had a few things to say, but none of it was about the changed
lines.
> report is funny
>
> > int mount_service_main(int argc, char *argv[])
> > Warning
> > Poorly documented large function
> > Poorly documented function: fewer than 2% comments for a function of 113 lines.
> > CodeQL
Hrmm. I guess I'll have to figure out how to get those things running.
That said, the stuff in mount_service.c is internal to libfuse (i.e.
it's not public library API) so I didn't comment them as intensely.
Would you like more?
> I think I'm going to merge my sync fuse init series tomorrow.
Yay!
> Are you ok if skip posting another version of the series?
^ is there an "I" here? e.g. "...if I skip posting..."?
/My/ normal practice (from xfs) was to repost the series as it was
merged, followed by an announcement. That way the mailing list is a
complete record of what was merged. However, very very few people
actually do that, even in the kernel.
I'm ok with you not posting another version of the series.
> Or do you prefer to review the recent changes and last piece I'm going
> to do tomorrow?
Nah, just merge it. I'll look over the changes once it's in the branch
and if there's anything weird, you or I or anyone else can send patches.
As long as you're not planning to tag it as a release, nothing's set in
stone.
> Basically my goal is to rebase your series against it immediately and
> to merge your series during the next few days.
<nod> Let me know what you want changed, I'll be around since I am not
travelling anywhere for a couple of weeks. :)
I can reflow changes into the patchset, or if you'd prefer, I can add
them as new patches that would go on the end of the series.
--D
^ permalink raw reply related
* Re: [GIT PULL v5.1] libfuse: run fuse servers as a contained service
From: Bernd Schubert @ 2026-04-30 21:34 UTC (permalink / raw)
To: Darrick J. Wong
Cc: fuse-devel, joannelkoong, linux-ext4, linux-fsdevel, miklos, neal
In-Reply-To: <177758364789.1315233.15610945404741826500.stg-ugh@frogsfrogsfrogs>
Hi Darrick,
On 4/30/26 23:18, Darrick J. Wong wrote:
> Hi Bernd,
>
> Please pull this branch with changes for libfuse.
>
> As usual, I did a test-merge with the main upstream branch as of a few
> minutes ago, and didn't see any conflicts. Please let me know if you
> encounter any problems.
pushed to my github branch. BSD build fails with
2026-04-30T21:25:16.3874802Z FAILED: [code=1] lib/libfuse3.so.3.19.0
2026-04-30T21:25:16.3906762Z cc -o lib/libfuse3.so.3.19.0 lib/libfuse3.so.3.19.0.p/fuse.c.o lib/libfuse3.so.3.19.0.p/fuse_loop.c.o lib/libfuse3.so.3.19.0.p/fuse_loop_mt.c.o lib/libfuse3.so.3.19.0.p/fuse_lowlevel.c.o lib/libfuse3.so.3.19.0.p/fuse_opt.c.o lib/libfuse3.so.3.19.0.p/fuse_signals.c.o lib/libfuse3.so.3.19.0.p/buffer.c.o lib/libfuse3.so.3.19.0.p/cuse_lowlevel.c.o lib/libfuse3.so.3.19.0.p/helper.c.o lib/libfuse3.so.3.19.0.p/modules_subdir.c.o lib/libfuse3.so.3.19.0.p/mount_util.c.o lib/libfuse3.so.3.19.0.p/fuse_log.c.o lib/libfuse3.so.3.19.0.p/compat.c.o lib/libfuse3.so.3.19.0.p/util.c.o lib/libfuse3.so.3.19.0.p/mount_bsd.c.o lib/libfuse3.so.3.19.0.p/fuse_service_stub.c.o lib/libfuse3.so.3.19.0.p/modules_iconv.c.o -Wl,--as-needed -Wl,--no-undefined -shared -fPIC -Wl,-soname,libfuse3.so.4 -Wl,--version-script,/home/runner/work/libfuse/libfuse/lib/fuse_versionscript -pthread -Wl,--start-group -ldl -lrt -Wl,--end-group
2026-04-30T21:25:16.3939590Z ld: error: version script assignment of 'FUSE_3.19' to symbol 'fuse_service_can_allow_other' failed: symbol not defined
2026-04-30T21:25:16.3951874Z cc: error: linker command failed with exit code 1 (use -v to see invocation)
2026-04-30T21:25:16.4291582Z [44/82] cc -Itest/test_teardown_watchdog.p -Itest -I../test -Iinclude -I../include -Ilib -I../lib -I. -I.. -fdiagnostics-color=always -
checkpatch, CodeChecker-cppcheck, CodeChecker-gcc also all fail. This CodeQL
report is funny
> int mount_service_main(int argc, char *argv[])
> Warning
> Poorly documented large function
> Poorly documented function: fewer than 2% comments for a function of 113 lines.
> CodeQL
I think I'm going to merge my sync fuse init series tomorrow. Are you ok if
skip posting another version of the series? Or do you prefer to review the
recent changes and last piece I'm going to do tomorrow?
Basically my goal is to rebase your series against it immediately and to
merge your series during the next few days.
Cheers,
Bernd
^ permalink raw reply
* [GIT PULL v5.1] libfuse: run fuse servers as a contained service
From: Darrick J. Wong @ 2026-04-30 21:18 UTC (permalink / raw)
To: bernd, djwong
Cc: fuse-devel, joannelkoong, linux-ext4, linux-fsdevel, miklos, neal
Hi Bernd,
Please pull this branch with changes for libfuse.
As usual, I did a test-merge with the main upstream branch as of a few
minutes ago, and didn't see any conflicts. Please let me know if you
encounter any problems.
--D
The following changes since commit f8abf5d1baa9fb689255f7091937081025749158:
Fix a sign bug in prepare_fuse_fd() (2026-04-29 17:50:45 +0200)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/libfuse.git tags/fuse-service-container_2026-04-30
for you to fetch changes up to 055d94404c8aedf3cb434503f4efa7686c35545b:
nullfs: support fuse systemd service mode (2026-04-30 14:13:32 -0700)
----------------------------------------------------------------
libfuse: run fuse servers as a contained service [v5.1 1/9]
This patchset defines the necessary communication protocols and library
code so that users can mount fuse servers that run in unprivileged
systemd service containers. That in turn allows unprivileged untrusted
mounts, because the worst that can happen is that a malicious image
crashes the fuse server and the mount dies, instead of corrupting the
kernel's memory.
v5.1: fix some of the SCM_RIGHTS handling code, fix header inclusion
errors, improve documentation of example code, improve statx
flags handling, improve phony timestamp handling
v5: Refactor socket IO into helpers, tighten the security checks in
mount_service.c, always set nosuid/nodev for unprivileged mounts,
use posix_spawnp in mount.fuse, restructure sample programs and hl
library code to avoid the need for unmounting during startup
v4.1: fix various cppcheck/codecheck complaints
v4: fix a large number of security problems that only matter when the
mount helper is being run as a setuid program; fix protocol
byteswapping problems; add CLOEXEC to all files being traded
back and forth; add an umount command; and strengthen mount socket
protocol checks.
v3: refactor the sample code to reduce duplication; fix all the
checkpatch complaints; examples actually build standalone;
fuservicemount handles utab now; cleaned up meson feature detection;
handle MS_ flags that don't translate to MOUNT_ATTR_*
v2: cleaned up error code handling and logging; add some example fuse
service; fuservicemount3 can now be a setuid program to allow
unprivileged userspace to fire up a contained filesystem driver.
This could be opening Pandora's box...
v1: detach from fuse-iomap series
With a bit of luck, this should all go splendidly.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
----------------------------------------------------------------
Bernd Schubert (1):
Refactor mount code / move common functions to mount_util.c
Darrick J. Wong (12):
mount_service: add systemd socket service mounting helper
mount_service: create high level fuse helpers
mount_service: use the new mount api for the mount service
mount_service: update mtab after a successful mount
util: hoist the fuse.conf parsing and setuid mode enforcement code
util: fix checkpatch complaints in fuser_conf.[ch]
mount_service: enable unprivileged users in a similar manner as fusermount
mount.fuse3: integrate systemd service startup
mount_service: allow installation as a setuid program
example/service_ll: create a sample systemd service fuse server
example/service: create a sample systemd service for a high-level fuse server
nullfs: support fuse systemd service mode
example/single_file.h | 195 ++
include/fuse.h | 34 +
include/fuse_service.h | 243 +++
include/fuse_service_priv.h | 161 ++
lib/fuse_i.h | 3 +
lib/mount_common_i.h | 22 +
lib/mount_util.h | 8 +
lib/util.h | 35 +
util/fuser_conf.h | 62 +
util/mount_service.h | 49 +
.github/workflows/install-ubuntu-dependencies.sh | 4 +
README.md | 3 +
doc/fuservicemount3.8 | 32 +
doc/meson.build | 3 +
example/meson.build | 26 +
example/null.c | 51 +-
example/null.socket.in | 15 +
example/null@.service | 102 ++
example/service_hl.c | 240 +++
example/service_hl.socket.in | 15 +
example/service_hl@.service | 102 ++
example/service_ll.c | 329 ++++
example/service_ll.socket.in | 15 +
example/service_ll@.service | 102 ++
example/single_file.c | 992 ++++++++++
include/meson.build | 4 +
lib/fuse_service.c | 1248 +++++++++++++
lib/fuse_service_stub.c | 106 ++
lib/fuse_versionscript | 18 +
lib/helper.c | 160 +-
lib/meson.build | 17 +-
lib/mount.c | 72 +-
lib/mount_util.c | 9 +
meson.build | 53 +-
meson_options.txt | 9 +
test/ci-build.sh | 14 +
util/fuser_conf.c | 398 ++++
util/fusermount.c | 363 +---
util/fuservicemount.c | 65 +
util/install_helper.sh | 6 +
util/meson.build | 24 +-
util/mount.fuse.c | 171 +-
util/mount_service.c | 2111 ++++++++++++++++++++++
43 files changed, 7287 insertions(+), 404 deletions(-)
create mode 100644 example/single_file.h
create mode 100644 include/fuse_service.h
create mode 100644 include/fuse_service_priv.h
create mode 100644 lib/mount_common_i.h
create mode 100644 util/fuser_conf.h
create mode 100644 util/mount_service.h
create mode 100644 doc/fuservicemount3.8
create mode 100644 example/null.socket.in
create mode 100644 example/null@.service
create mode 100644 example/service_hl.c
create mode 100644 example/service_hl.socket.in
create mode 100644 example/service_hl@.service
create mode 100644 example/service_ll.c
create mode 100644 example/service_ll.socket.in
create mode 100644 example/service_ll@.service
create mode 100644 example/single_file.c
create mode 100644 lib/fuse_service.c
create mode 100644 lib/fuse_service_stub.c
create mode 100644 util/fuser_conf.c
create mode 100644 util/fuservicemount.c
create mode 100644 util/mount_service.c
Here's the range diff from v5 to v5.1.
1: 5a6a95b117389d ! 1: af50a468568e3d fs: turn on more warnings for the filesystem code we modify most
@@ meson.build: base_version = version_parts[0]
+# W=e
+if host_machine.cpu_family() == 'x86_64'
+ WARNINGS += [ '-Werror' ]
+endif
+
+# W=1
-+WARNINGS += [ '-Wextra', '-Wunused', '-Wno-unused-parameter' ]
++WARNINGS += [ '-Wextra', '-Wunused', '-Wunused-parameter' ]
+WARNINGS += [ '-Wmissing-declarations' ]
+WARNINGS += [ '-Wrestrict' ]
+WARNINGS += [ '-Wmissing-format-attribute' ]
+WARNINGS += [ '-Wmissing-prototypes' ]
+WARNINGS += [ '-Wold-style-definition' ]
+WARNINGS += [ '-Wmissing-include-dirs' ]
@@ meson.build: base_version = version_parts[0]
+#WARNINGS += [ '-Wsign-compare' ] # percpu
+#WARNINGS += [ '-Wswitch-default' ] # everywhere
+#WARNINGS += $(call cc-option, -Wpacked-bitfield-compat) # not a gcc thing
+
+# Stuff we need to fix in xfsprogs
+WARNINGS += [ '-Wno-suggest-attribute=format' ] # dont care about printf crap
-+WARNINGS += [ '-Wno-shadow' ] # many programs have global variables
-+WARNINGS += [ '-Wno-missing-field-initializers' ] # why is this even a problem?
++WARNINGS += [ '-Wshadow' ] # many programs have global variables
++WARNINGS += [ '-Wmissing-field-initializers' ] # why is this even a problem?
+WARNINGS += [ '-Wno-sign-compare' ] # zomg so much macro
-+WARNINGS += [ '-Wno-dangling-pointer' ] # gcc 12.2 bug
++WARNINGS += [ '-Wdangling-pointer' ] # gcc 12.2 bug
+
+WARNINGS += [ '-Wno-error=type-limits' ] # rtgroups patchset
+WARNINGS += [ '-Wno-error=array-bounds' ] # rtgroups patchset
+
+# OpenSSF recommendations March 2025
+# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
+#WARNINGS += [ '-O2' ] # already set elsewhere
+#WARNINGS += [ '-Wall' ]
+WARNINGS += [ '-Wformat' ]
+WARNINGS += [ '-Wformat=2' ]
-+WARNINGS += [ '-Wno-format-nonliteral' ] # xfs_db and friends fail this everywhere
++WARNINGS += [ '-Wno-error=format-nonliteral' ] # xfs_db and friends fail this everywhere
+#WARNINGS += [ '-Wconversion' ]
+WARNINGS += [ '-Wimplicit-fallthrough' ]
+WARNINGS += [ '-Werror=format-security' ]
+WARNINGS += [ '-U_FORTIFY_SOURCE' ]
+WARNINGS += [ '-D_FORTIFY_SOURCE=2' ] # debian defaults to 2
+WARNINGS += [ '-D_GLIBCXX_ASSERTIONS' ]
2: eda4cf0a9c9400 = 2: 4871909466456d Refactor mount code / move common functions to mount_util.c
3: 6f098240905230 ! 3: ff81706aa6d6fd mount_service: add systemd socket service mounting helper
@@ lib/fuse_service.c (new)
+ .iov_base = buf,
+ .iov_len = bufsize,
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ char control[CMSG_SPACE(sizeof(int))];
-+ } cmsgu;
++ } cmsgu = { };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsgu.control,
-+ .msg_controllen = sizeof(cmsgu.control),
++
++ /*
++ * Do not include padding at the end of the control buffer,
++ * because we don't want to receive fds that we weren't
++ * expecting.
++ */
++ .msg_controllen = CMSG_LEN(sizeof(int)),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t size;
+
-+ memset(&cmsgu, 0, sizeof(cmsgu));
++ /*
++ * A kernel LSM could decide to deny the fd transfer by writing a
++ * negative number (== invalid fd) into the cmsg buffer instead of
++ * installing the fd. Set the initial fd value to -1 to signal an
++ * invalid fd in case the kernel doesn't even set the cmsg buffer.
++ * It shouldn't do that, but we absolutely don't want a zero here.
++ */
++ memset(cmsgu.control, -1, sizeof(cmsgu.control));
+
+ size = recvmsg(sf->sockfd, &msg, MSG_TRUNC | MSG_CMSG_CLOEXEC);
+ if (size < 0) {
+ int error = errno;
+
+ fuse_log(FUSE_LOG_ERR, "fuse: service file reply: %s\n",
@@ lib/fuse_service.c (new)
+ size < offsetof(struct fuse_service_requested_file, path)) {
+ fuse_log(FUSE_LOG_ERR, "fuse: wrong service file reply size %zd, expected %zd\n",
+ size, bufsize);
+ return -EBADMSG;
+ }
+
++ if (msg.msg_flags & MSG_CTRUNC) {
++ /* SMACK does this */
++ fuse_log(FUSE_LOG_ERR,
++"fuse: service file reply control data truncated; did an LSM deny SCM_RIGHTS?\n");
++ return -EBADMSG;
++ }
++
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!cmsg) {
+ /* no control message means mount.service sent us an error */
+ return 0;
+ }
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
@@ lib/fuse_service.c (new)
+ }
+
+ ret = __recv_fd(sf, req, req_sz, &fd);
+ if (ret)
+ goto out_req;
+
++ if (fd < 0) {
++ /* The kernel might have given us an errno instead of an fd */
++ fuse_log(FUSE_LOG_ERR, "fuse: service fd transfer failed: %s\n",
++ strerror(-fd));
++ ret = fd;
++ goto out_req;
++ }
++
+ if (ntohl(req->p.magic) != FUSE_SERVICE_OPEN_REPLY) {
+ fuse_log(FUSE_LOG_ERR, "fuse: service file reply contains wrong magic!\n");
+ ret = -EBADMSG;
+ goto out_close;
+ }
+ if (strcmp(req->path, path)) {
4: 82f5466695848e = 4: 2c4995923f9ae2 mount_service: create high level fuse helpers
5: e684fb005a841e = 5: e53ee192306670 mount_service: use the new mount api for the mount service
6: 13152dd09d7bb0 = 6: 3c58b3ca279cb6 mount_service: update mtab after a successful mount
7: ed78d2368a3c15 ! 7: 4fa9d76f54011e util: hoist the fuse.conf parsing and setuid mode enforcement code
@@ util/fuser_conf.c (new)
+#include <stdlib.h>
+#include <errno.h>
+#include <mntent.h>
+#include <unistd.h>
+#include <sys/fsuid.h>
+
++#include "fuse_mount_compat.h"
++
+#if defined HAVE_LISTMOUNT
+#include <linux/mount.h>
+#include <syscall.h>
+#include <stdint.h>
+#endif
+
8: e2f84fca73efb3 = 8: 75ea3fbf340c75 util: fix checkpatch complaints in fuser_conf.[ch]
9: ee31248f7b7e31 = 9: 068b0b4b775fc6 mount_service: enable unprivileged users in a similar manner as fusermount
10: 125e8990ce56db ! 10: 4b455b65ca7756 mount.fuse3: integrate systemd service startup
@@ util/mount.fuse.c: static void drop_and_lock_capabilities(void)
+ if (ret) {
+ fprintf(stderr, "%s: could not start %s helper: %s\n",
+ argv[0], FUSERVICEMOUNT_PROG, strerror(ret));
+ return MOUNT_SERVICE_FALLBACK_NEEDED;
+ }
+
-+ ret = waitpid(child_pid, &child_status, 0);
++ do {
++ ret = waitpid(child_pid, &child_status, 0);
++ } while (ret < 0 && errno == EINTR);
+ if (ret < 0) {
+ fprintf(stderr, "%s: could not wait for %s helper: %s\n",
+ argv[0], FUSERVICEMOUNT_PROG, strerror(errno));
+ return MOUNT_SERVICE_FALLBACK_NEEDED;
+ }
+
11: bbc7cbd7a3d185 = 11: 5b8ce2254d15e1 mount_service: allow installation as a setuid program
12: 85aac9e1d35d23 ! 12: d095eda280909d example/service_ll: create a sample systemd service fuse server
@@ example/single_file.h (new)
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
++ *
++ * This file is shared library code for example fuse servers that want to
++ * expose a single regular file that wraps another file in a manner that goes
++ * beyond simple passthrough. It is not itself a fuse server.
+ */
+#ifndef FUSE_SINGLE_FILE_H_
+#define FUSE_SINGLE_FILE_H_
+
+static inline uint64_t round_up(uint64_t b, unsigned int align)
+{
@@ example/service_ll.c (new)
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ */
+
+/** @file
+ *
-+ * minimal example filesystem using low-level API and systemd service api
++ * Minimal example filesystem using low-level API and systemd service API.
++ *
++ * - Shows how to build a low level FUSE filesystem server that can be managed
++ * by systemd
++ * - Enables on-demand filesystem mounting via socket activation
++ * - Demonstrates requesting resources from the mount-caller's environment
++ * - Allows running FUSE servers with minimal privileges; isolated mount,
++ * network, and pid namespaces; and a separate uid/gid (unlike traditional
++ * FUSE which needs mount permissions and runs in the caller's environment)
+ *
+ * Compile with:
+ *
+ * gcc -Wall single_file.c service_ll.c `pkg-config fuse3 --cflags --libs` -o service_ll
+ *
+ * Note: If the pkg-config command fails due to the absence of the fuse3.pc
@@ example/service_ll.c (new)
+ *
+ * ExecStart=/path/to/service_ll
+ *
+ * to point to the actual path of the service_ll binary.
+ *
+ * Finally, install the service_ll@.service and service_ll.socket files to the
-+ * systemd service directory, usually /run/systemd/system.
++ * systemd service directory, usually /run/systemd/system. Run these commands
++ * to activate:
++ *
++ * systemctl daemon-reload
++ * systemctl start service_ll.socket
++ *
++ * Then mount with:
++ *
++ * mount -t fuse.service_ll /dev/sda /mnt
+ *
+ * ## Source code ##
+ * \include service_ll.c
+ * \include service_ll.socket
+ * \include service_ll@.service
+ * \include single_file.c
@@ example/single_file.c (new)
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
++ *
++ * This file is shared library code for example fuse servers that want to
++ * expose a single regular file that wraps another file in a manner that goes
++ * beyond simple passthrough. It is not itself a fuse server.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <errno.h>
@@ example/single_file.c (new)
+void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
+ struct fuse_file_info *fi)
+{
+ struct statx stx = { };
+ bool filled;
+
-+ (void)flags;
+ (void)fi;
+
++ if ((flags & AT_STATX_FORCE_SYNC) && is_single_file_ino(ino) &&
++ single_file.backing_fd >= 0) {
++ int ret = fsync(single_file.backing_fd);
++
++ if (ret) {
++ fuse_reply_err(req, errno);
++ return;
++ }
++ }
++
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_statx(ino, mask, &stx);
+ pthread_mutex_unlock(&single_file.lock);
+ if (!filled)
+ fuse_reply_err(req, ENOENT);
+ else
@@ example/single_file.c (new)
+ if (to_set & FUSE_SET_ATTR_MTIME_NOW)
+ single_file.mtime = now;
+ else
+ single_file.mtime = attr->st_mtim;
+ }
+ if (to_set & FUSE_SET_ATTR_CTIME)
-+ single_file.ctime = attr->st_mtim;
-+ else
+ single_file.ctime = now;
+ pthread_mutex_unlock(&single_file.lock);
+
+ single_file_ll_getattr(req, ino, fi);
+ return;
+deny:
@@ example/single_file.c (new)
+ return -errno;
+ }
+
+ get_now(&now);
+
+ pthread_mutex_lock(&single_file.lock);
++ single_file.mtime = now;
+ single_file.ctime = now;
+ pthread_mutex_unlock(&single_file.lock);
+
+ return processed;
+ }
+
@@ example/single_file.c (new)
+ single_file_name_set = true;
+ }
+
+ get_now(&startup_time);
+ single_file.atime = startup_time;
+ single_file.mtime = startup_time;
++ single_file.ctime = startup_time;
+
+ if (!single_file.ro)
+ single_file.mode |= 0220;
+
+ return 0;
+}
13: 9702e37fa0895f ! 13: 48f04af4d1cc37 example/service: create a sample systemd service for a high-level fuse server
@@ example/service_hl.c (new)
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ */
+
+/** @file
+ *
-+ * minimal example filesystem using high-level API and systemd service api
++ * Minimal example filesystem using high-level API and systemd service API.
++ *
++ * - Shows how to build a high level FUSE filesystem server that can be managed
++ * by systemd
++ * - Enables on-demand filesystem mounting via socket activation
++ * - Demonstrates requesting resources from the mount-caller's environment
++ * - Allows running FUSE servers with minimal privileges; isolated mount,
++ * network, and pid namespaces; and a separate uid/gid (unlike traditional
++ * FUSE which needs mount permissions and runs in the caller's environment)
+ *
+ * Compile with:
+ *
+ * gcc -Wall single_file.c service_hl.c `pkg-config fuse3 --cflags --libs` -o service_hl
+ *
+ * Note: If the pkg-config command fails due to the absence of the fuse3.pc
@@ example/service_hl.c (new)
+ *
+ * ExecStart=/path/to/service_hl
+ *
+ * to point to the actual path of the service_hl binary.
+ *
+ * Finally, install the service_hl@.service and service_hl.socket files to the
-+ * systemd service directory, usually /run/systemd/system.
++ * systemd service directory, usually /run/systemd/system. Run these commands
++ * to activate:
++ *
++ * systemctl daemon-reload
++ * systemctl start service_hl.socket
++ *
++ * Then mount with:
++ *
++ * mount -t fuse.service_hl /dev/sda /mnt
+ *
+ * ## Source code ##
+ * \include service_hl.c
+ * \include service_hl.socket
+ * \include service_hl@.service
+ * \include single_file.c
@@ example/single_file.c: void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino,
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+ bool filled;
+
+ if (!ino)
+ return -ENOENT;
+
++ if ((statx_flags & AT_STATX_FORCE_SYNC) && is_single_file_ino(ino) &&
++ single_file.backing_fd >= 0) {
++ int ret = fsync(single_file.backing_fd);
++
++ if (ret)
++ return -errno;
++ }
++
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_statx(ino, statx_mask, stx);
+ pthread_mutex_unlock(&single_file.lock);
+
+ return filled ? 0 : -ENOENT;
+}
@@ example/single_file.c: void single_file_ll_setattr(fuse_req_t req, fuse_ino_t in
+ return -EPERM;
+ if (single_file.ro)
+ return -EPERM;
+
+ pthread_mutex_lock(&single_file.lock);
+ single_file.mode = (single_file.mode & S_IFMT) | (mode & ~S_IFMT);
++ get_now(&single_file.ctime);
+ pthread_mutex_unlock(&single_file.lock);
+
+ return 0;
+}
+
+static void set_time(const struct timespec *ctv, struct timespec *tv)
14: cd1acb1dc7d492 = 14: f5597fbd63f7a6 nullfs: support fuse systemd service mode
^ permalink raw reply
* [PATCH 13/13] nullfs: support fuse systemd service mode
From: Darrick J. Wong @ 2026-04-30 21:18 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
This is the only example fuse server that exports a regular file instead
of a directory tree. Port it to be usable as a systemd fuse service so
that we can test that capability.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
example/meson.build | 6 +++
example/null.c | 51 +++++++++++++++++++++++-
example/null.socket.in | 15 +++++++
example/null@.service | 102 ++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 171 insertions(+), 3 deletions(-)
create mode 100644 example/null.socket.in
create mode 100644 example/null@.service
diff --git a/example/meson.build b/example/meson.build
index 19a383f7cd2c74..45dbf26eb355a7 100644
--- a/example/meson.build
+++ b/example/meson.build
@@ -10,6 +10,12 @@ if not platform.endswith('bsd') and platform != 'dragonfly'
# support mounting files, This is enforced in vfs_domount_first()
# with the v_type != VDIR check.
examples += [ 'null' ]
+
+ if platform.endswith('linux')
+ configure_file(input: 'null.socket.in',
+ output: 'null.socket',
+ configuration: private_cfg)
+ endif
endif
single_file_examples = [ ]
diff --git a/example/null.c b/example/null.c
index ec41def40ed5c5..43135cde39713e 100644
--- a/example/null.c
+++ b/example/null.c
@@ -17,15 +17,24 @@
*
* gcc -Wall null.c `pkg-config fuse3 --cflags --libs` -o null
*
+ * Change the ExecStart line in nullfile@.service:
+ *
+ * ExecStart=/path/to/null
+ *
+ * to point to the actual path of the null binary.
+ *
+ * Finally, install the null@.service and null.socket files to the
+ * systemd service directory, usually /run/systemd/system.
+ *
* ## Source code ##
* \include passthrough_fh.c
*/
-
-#define FUSE_USE_VERSION 31
+#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 19)
#include <fuse.h>
#include <fuse_lowlevel.h>
+#include <fuse_service.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -33,6 +42,8 @@
#include <time.h>
#include <errno.h>
+static mode_t mode = 0644;
+
static int null_getattr(const char *path, struct stat *stbuf,
struct fuse_file_info *fi)
{
@@ -41,7 +52,7 @@ static int null_getattr(const char *path, struct stat *stbuf,
if(strcmp(path, "/") != 0)
return -ENOENT;
- stbuf->st_mode = S_IFREG | 0644;
+ stbuf->st_mode = S_IFREG | mode;
stbuf->st_nlink = 1;
stbuf->st_uid = getuid();
stbuf->st_gid = getgid();
@@ -112,11 +123,45 @@ static const struct fuse_operations null_oper = {
.write = null_write,
};
+static int null_service(struct fuse_service *service, struct fuse_args *args)
+{
+ int ret = 1;
+
+ if (fuse_service_append_args(service, args))
+ goto err_service;
+
+ if (fuse_service_finish_file_requests(service))
+ goto err_service;
+
+ fuse_service_expect_mount_format(service, S_IFREG);
+
+ /*
+ * In non-service mode, we set up the file to be owned and writable
+ * by the same user that starts the fuse server. When running in a
+ * container as a dynamic user, we just grant world write access.
+ */
+ mode = 0666;
+ ret = fuse_service_main(service, args, &null_oper, NULL);
+
+err_service:
+ fuse_service_send_goodbye(service, ret);
+ fuse_service_destroy(&service);
+ fuse_opt_free_args(args);
+ return fuse_service_exit(ret);
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
struct fuse_cmdline_opts opts;
struct stat stbuf;
+ struct fuse_service *service = NULL;
+
+ if (fuse_service_accept(&service) != 0)
+ return 1;
+
+ if (fuse_service_accepted(service))
+ return null_service(service, &args);
if (fuse_parse_cmdline(&args, &opts) != 0)
return 1;
diff --git a/example/null.socket.in b/example/null.socket.in
new file mode 100644
index 00000000000000..865e739561a45e
--- /dev/null
+++ b/example/null.socket.in
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=Socket for null Service
+
+[Socket]
+ListenSequentialPacket=@FUSE_SERVICE_SOCKET_DIR_RAW@/null
+Accept=yes
+SocketMode=@FUSE_SERVICE_SOCKET_PERMS@
+RemoveOnStop=yes
+
+[Install]
+WantedBy=sockets.target
diff --git a/example/null@.service b/example/null@.service
new file mode 100644
index 00000000000000..f77fbe927217cf
--- /dev/null
+++ b/example/null@.service
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=null Sample Fuse Service
+
+# Don't leave failed units behind, systemd does not clean them up!
+CollectMode=inactive-or-failed
+
+[Service]
+Type=exec
+ExecStart=/path/to/null
+
+# Try to capture core dumps
+LimitCORE=infinity
+
+SyslogIdentifier=%N
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Don't let us see anything in the regular system, and don't run as root
+DynamicUser=true
+ProtectSystem=strict
+ProtectHome=true
+PrivateTmp=true
+PrivateDevices=true
+PrivateUsers=true
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+RestrictFileSystems=
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+
+SystemCallFilter=~@clock
+SystemCallFilter=~@cpu-emulation
+SystemCallFilter=~@debug
+SystemCallFilter=~@module
+SystemCallFilter=~@reboot
+SystemCallFilter=~@swap
+
+SystemCallFilter=~@mount
+
+# libfuse io_uring wants to pin cores and memory
+SystemCallFilter=mbind
+SystemCallFilter=sched_setaffinity
+
+# Leave a breadcrumb if we get whacked by the system call filter
+SystemCallErrorNumber=EL3RST
+
+# Log to the kernel dmesg, just like an in-kernel filesystem driver
+StandardOutput=append:/dev/ttyprintk
+StandardError=append:/dev/ttyprintk
+
+# Run with no capabilities at all
+CapabilityBoundingSet=
+AmbientCapabilities=
+NoNewPrivileges=true
+
+# We don't create files
+UMask=7777
+
+# No access to hardware /dev files at all
+ProtectClock=true
+DevicePolicy=closed
+
+# Don't mess with set[ug]id anything.
+RestrictSUIDSGID=true
+
+# Don't let OOM kills of processes in this containment group kill the whole
+# service, because we don't want filesystem drivers to go down.
+OOMPolicy=continue
+OOMScoreAdjust=-1000
^ permalink raw reply related
* [PATCH 12/13] example/service: create a sample systemd service for a high-level fuse server
From: Darrick J. Wong @ 2026-04-30 21:18 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Create a simple high-level fuse server that can be run as a systemd
service.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
example/single_file.h | 38 ++++++
example/meson.build | 6 +
example/service_hl.c | 240 ++++++++++++++++++++++++++++++++++++
example/service_hl.socket.in | 15 ++
example/service_hl@.service | 102 +++++++++++++++
example/service_ll.c | 1
example/single_file.c | 280 +++++++++++++++++++++++++++++++++++++++---
7 files changed, 665 insertions(+), 17 deletions(-)
create mode 100644 example/service_hl.c
create mode 100644 example/service_hl.socket.in
create mode 100644 example/service_hl@.service
diff --git a/example/single_file.h b/example/single_file.h
index 6e5d3e7c975385..290dd6051ed6f5 100644
--- a/example/single_file.h
+++ b/example/single_file.h
@@ -128,6 +128,7 @@ ssize_t single_file_pread(char *buf, size_t count, off_t pos);
/* low-level fuse operation handlers */
+#ifdef USE_SINGLE_FILE_LL_API
bool is_single_file_child(fuse_ino_t parent, const char *name);
bool is_single_file_ino(fuse_ino_t ino);
@@ -153,5 +154,42 @@ void single_file_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
int reply_buf_limited(fuse_req_t req, const char *buf, size_t bufsize,
off_t off, size_t maxsize);
+#endif
+
+/* high-level fuse operation handlers */
+
+#ifdef USE_SINGLE_FILE_HL_API
+bool is_single_open_file_path(const struct fuse_file_info *fi, const char *name);
+
+int single_file_hl_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi,
+ enum fuse_readdir_flags flags);
+
+int single_file_hl_statfs(const char *path, struct statvfs *buf);
+
+int single_file_hl_statx(const char *path, int statx_flags, int statx_mask,
+ struct statx *stx, struct fuse_file_info *fi);
+
+int single_file_hl_getattr(const char *path, struct stat *stbuf,
+ struct fuse_file_info *fi);
+int single_file_hl_chmod(const char *path, mode_t mode,
+ struct fuse_file_info *fi);
+int single_file_hl_utimens(const char *path, const struct timespec ctv[2],
+ struct fuse_file_info *fi);
+int single_file_hl_chown(const char *path, uid_t owner, gid_t group,
+ struct fuse_file_info *fi);
+int single_file_hl_truncate(const char *path, off_t len,
+ struct fuse_file_info *fi);
+
+int single_file_hl_opendir(const char *path, struct fuse_file_info *fi);
+int single_file_hl_open(const char *path, struct fuse_file_info *fi);
+
+int single_file_hl_fsync(const char *path, int datasync,
+ struct fuse_file_info *fi);
+#endif
+
+#if !defined(USE_SINGLE_FILE_LL_API) && !defined(USE_SINGLE_FILE_HL_API)
+# warning USE_SINGLE_FILE_[HL]L_API not defined!
+#endif
#endif /* FUSE_SINGLE_FILE_H_ */
diff --git a/example/meson.build b/example/meson.build
index e948f6ba74fdfa..19a383f7cd2c74 100644
--- a/example/meson.build
+++ b/example/meson.build
@@ -19,6 +19,12 @@ if platform.endswith('linux')
configure_file(input: 'service_ll.socket.in',
output: 'service_ll.socket',
configuration: private_cfg)
+
+ single_file_examples += [ 'service_hl' ]
+ configure_file(input: 'service_hl.socket.in',
+ output: 'service_hl.socket',
+ configuration: private_cfg)
+
endif
threaded_examples = [ 'notify_inval_inode',
diff --git a/example/service_hl.c b/example/service_hl.c
new file mode 100644
index 00000000000000..ea041f670f2ec5
--- /dev/null
+++ b/example/service_hl.c
@@ -0,0 +1,240 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ */
+
+/** @file
+ *
+ * Minimal example filesystem using high-level API and systemd service API.
+ *
+ * - Shows how to build a high level FUSE filesystem server that can be managed
+ * by systemd
+ * - Enables on-demand filesystem mounting via socket activation
+ * - Demonstrates requesting resources from the mount-caller's environment
+ * - Allows running FUSE servers with minimal privileges; isolated mount,
+ * network, and pid namespaces; and a separate uid/gid (unlike traditional
+ * FUSE which needs mount permissions and runs in the caller's environment)
+ *
+ * Compile with:
+ *
+ * gcc -Wall single_file.c service_hl.c `pkg-config fuse3 --cflags --libs` -o service_hl
+ *
+ * Note: If the pkg-config command fails due to the absence of the fuse3.pc
+ * file, you should configure the path to the fuse3.pc file in the
+ * PKG_CONFIG_PATH variable.
+ *
+ * Change the ExecStart line in service_hl@.service:
+ *
+ * ExecStart=/path/to/service_hl
+ *
+ * to point to the actual path of the service_hl binary.
+ *
+ * Finally, install the service_hl@.service and service_hl.socket files to the
+ * systemd service directory, usually /run/systemd/system. Run these commands
+ * to activate:
+ *
+ * systemctl daemon-reload
+ * systemctl start service_hl.socket
+ *
+ * Then mount with:
+ *
+ * mount -t fuse.service_hl /dev/sda /mnt
+ *
+ * ## Source code ##
+ * \include service_hl.c
+ * \include service_hl.socket
+ * \include service_hl@.service
+ * \include single_file.c
+ * \include single_file.h
+ */
+
+#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 19)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <fuse.h>
+#include <fuse_service.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#define USE_SINGLE_FILE_HL_API
+#include "single_file.h"
+
+struct service_hl {
+ char *device;
+ struct fuse_service *service;
+
+ /* really booleans */
+ int debug;
+};
+
+static struct service_hl hl = { };
+
+static void *service_hl_init(struct fuse_conn_info *conn,
+ struct fuse_config *cfg)
+{
+ (void) conn;
+ cfg->kernel_cache = 1;
+
+ return NULL;
+}
+
+static int service_hl_read(const char *path, char *buf, size_t count,
+ off_t pos, struct fuse_file_info *fi)
+{
+ if (!is_single_open_file_path(fi, path))
+ return -EIO;
+
+ if (hl.debug)
+ fprintf(stderr, "%s: pos 0x%llx count 0x%llx\n",
+ __func__,
+ (unsigned long long)pos,
+ (unsigned long long)count);
+
+ if (!single_file.allow_dio && fi->direct_io)
+ return -ENOSYS;
+
+ single_file_check_read(pos, &count);
+
+ if (!count)
+ return 0;
+
+ return single_file_pread(buf, count, pos);
+}
+
+static int service_hl_write(const char *path, const char *buf, size_t count,
+ off_t pos, struct fuse_file_info *fi)
+{
+ int ret;
+
+ if (!is_single_open_file_path(fi, path))
+ return -EIO;
+
+ if (hl.debug)
+ fprintf(stderr, "%s: pos 0x%llx count 0x%llx\n",
+ __func__,
+ (unsigned long long)pos,
+ (unsigned long long)count);
+
+ if (!single_file.allow_dio && fi->direct_io)
+ return -ENOSYS;
+
+ ret = single_file_check_write(pos, &count);
+ if (ret < 0)
+ return ret;
+
+ if (!count)
+ return 0;
+
+ return single_file_pwrite(buf, count, pos);
+}
+
+static const struct fuse_operations service_hl_oper = {
+ .getattr = single_file_hl_getattr,
+ .readdir = single_file_hl_readdir,
+ .open = single_file_hl_open,
+ .opendir = single_file_hl_opendir,
+ .statfs = single_file_hl_statfs,
+ .chmod = single_file_hl_chmod,
+ .utimens = single_file_hl_utimens,
+ .fsync = single_file_hl_fsync,
+ .chown = single_file_hl_chown,
+ .truncate = single_file_hl_truncate,
+ .statx = single_file_hl_statx,
+
+ .init = service_hl_init,
+ .read = service_hl_read,
+ .write = service_hl_write,
+};
+
+#define SERVICE_HL_OPT(t, p, v) { t, offsetof(struct service_hl, p), v }
+
+static struct fuse_opt service_hl_opts[] = {
+ SERVICE_HL_OPT("debug", debug, 1),
+ SINGLE_FILE_OPT_KEYS,
+ FUSE_OPT_END
+};
+
+static int service_hl_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs)
+{
+ int ret = single_file_opt_proc(data, arg, key, outargs);
+
+ if (ret < 1)
+ return ret;
+
+ switch (key) {
+ case FUSE_OPT_KEY_NONOPT:
+ if (!hl.device) {
+ hl.device = strdup(arg);
+ return 0;
+ }
+ return 1;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+ int ret = 1;
+
+ if (fuse_service_accept(&hl.service))
+ goto err_args;
+
+ if (!fuse_service_accepted(hl.service))
+ goto err_args;
+
+ if (fuse_service_append_args(hl.service, &args))
+ goto err_service;
+
+ if (fuse_opt_parse(&args, &hl, service_hl_opts, service_hl_opt_proc))
+ goto err_service;
+
+ if (!hl.device) {
+ printf("usage: %s [options] <device> <mountpoint>\n", argv[0]);
+ printf(" %s --help\n", argv[0]);
+ goto err_service;
+ }
+
+ if (single_file_service_open(hl.service, hl.device))
+ goto err_service;
+
+ if (fuse_service_finish_file_requests(hl.service))
+ goto err_singlefile;
+
+ if (single_file_configure(hl.device, NULL))
+ goto err_singlefile;
+
+ fuse_service_expect_mount_format(hl.service, S_IFDIR);
+
+ ret = fuse_service_main(hl.service, &args, &service_hl_oper, NULL);
+
+err_singlefile:
+ single_file_close();
+err_service:
+ free(hl.device);
+ fuse_service_send_goodbye(hl.service, ret);
+ fuse_service_destroy(&hl.service);
+err_args:
+ fuse_opt_free_args(&args);
+ return fuse_service_exit(ret);
+}
diff --git a/example/service_hl.socket.in b/example/service_hl.socket.in
new file mode 100644
index 00000000000000..46035d6c315b8d
--- /dev/null
+++ b/example/service_hl.socket.in
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=Socket for service_hl Service
+
+[Socket]
+ListenSequentialPacket=@FUSE_SERVICE_SOCKET_DIR_RAW@/service_hl
+Accept=yes
+SocketMode=@FUSE_SERVICE_SOCKET_PERMS@
+RemoveOnStop=yes
+
+[Install]
+WantedBy=sockets.target
diff --git a/example/service_hl@.service b/example/service_hl@.service
new file mode 100644
index 00000000000000..883b9c649cbc90
--- /dev/null
+++ b/example/service_hl@.service
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=service_hl Sample Fuse Service
+
+# Don't leave failed units behind, systemd does not clean them up!
+CollectMode=inactive-or-failed
+
+[Service]
+Type=exec
+ExecStart=/path/to/service_hl
+
+# Try to capture core dumps
+LimitCORE=infinity
+
+SyslogIdentifier=%N
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Don't let us see anything in the regular system, and don't run as root
+DynamicUser=true
+ProtectSystem=strict
+ProtectHome=true
+PrivateTmp=true
+PrivateDevices=true
+PrivateUsers=true
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+RestrictFileSystems=
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+
+SystemCallFilter=~@clock
+SystemCallFilter=~@cpu-emulation
+SystemCallFilter=~@debug
+SystemCallFilter=~@module
+SystemCallFilter=~@reboot
+SystemCallFilter=~@swap
+
+SystemCallFilter=~@mount
+
+# libfuse io_uring wants to pin cores and memory
+SystemCallFilter=mbind
+SystemCallFilter=sched_setaffinity
+
+# Leave a breadcrumb if we get whacked by the system call filter
+SystemCallErrorNumber=EL3RST
+
+# Log to the kernel dmesg, just like an in-kernel filesystem driver
+StandardOutput=append:/dev/ttyprintk
+StandardError=append:/dev/ttyprintk
+
+# Run with no capabilities at all
+CapabilityBoundingSet=
+AmbientCapabilities=
+NoNewPrivileges=true
+
+# We don't create files
+UMask=7777
+
+# No access to hardware /dev files at all
+ProtectClock=true
+DevicePolicy=closed
+
+# Don't mess with set[ug]id anything.
+RestrictSUIDSGID=true
+
+# Don't let OOM kills of processes in this containment group kill the whole
+# service, because we don't want filesystem drivers to go down.
+OOMPolicy=continue
+OOMScoreAdjust=-1000
diff --git a/example/service_ll.c b/example/service_ll.c
index d045176443d4e3..33a8bd48bc1215 100644
--- a/example/service_ll.c
+++ b/example/service_ll.c
@@ -67,6 +67,7 @@
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
+#define USE_SINGLE_FILE_LL_API
#include "single_file.h"
struct service_ll {
diff --git a/example/single_file.c b/example/single_file.c
index 9b6f76504686b5..a962232d576e17 100644
--- a/example/single_file.c
+++ b/example/single_file.c
@@ -27,7 +27,10 @@
#define FUSE_USE_VERSION (FUSE_MAKE_VERSION(3, 19))
#include "fuse_lowlevel.h"
+#include "fuse.h"
#include "fuse_service.h"
+#define USE_SINGLE_FILE_LL_API
+#define USE_SINGLE_FILE_HL_API
#include "single_file.h"
#define min(x, y) ((x) < (y) ? (x) : (y))
@@ -60,6 +63,23 @@ struct single_file single_file = {
.lock = PTHREAD_MUTEX_INITIALIZER,
};
+static fuse_ino_t single_file_path_to_ino(const char *path)
+{
+ if (strcmp(path, "/") == 0)
+ return FUSE_ROOT_ID;
+ if (strcmp(path + 1, single_file_name) == 0)
+ return SINGLE_FILE_INO;
+ return 0;
+}
+
+static fuse_ino_t single_open_file_path_to_ino(const struct fuse_file_info *fi,
+ const char *path)
+{
+ if (fi)
+ return fi->fh;
+ return single_file_path_to_ino(path);
+}
+
static void dirbuf_add(fuse_req_t req, struct dirbuf *b, const char *name,
fuse_ino_t ino)
{
@@ -95,6 +115,13 @@ bool is_single_file_ino(fuse_ino_t ino)
return ino == SINGLE_FILE_INO;
}
+bool is_single_open_file_path(const struct fuse_file_info *fi, const char *name)
+{
+ if (fi)
+ return is_single_file_ino(fi->fh);
+ return name[0] == '/' && strcmp(name + 1, single_file_name) == 0;
+}
+
void single_file_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
off_t off, struct fuse_file_info *fi)
{
@@ -121,6 +148,37 @@ void single_file_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
free(b.p);
}
+int single_file_hl_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi,
+ enum fuse_readdir_flags flags)
+{
+ struct stat stbuf;
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+
+ memset(&stbuf, 0, sizeof(stbuf));
+
+ (void) offset;
+ (void) flags;
+
+ switch (ino) {
+ case FUSE_ROOT_ID:
+ break;
+ case SINGLE_FILE_INO:
+ return -ENOTDIR;
+ default:
+ return -ENOENT;
+ }
+
+ stbuf.st_ino = FUSE_ROOT_ID;
+ filler(buf, ".", &stbuf, 0, FUSE_FILL_DIR_DEFAULTS);
+ filler(buf, "..", &stbuf, 0, FUSE_FILL_DIR_DEFAULTS);
+
+ stbuf.st_ino = SINGLE_FILE_INO;
+ filler(buf, single_file_name, &stbuf, 0, FUSE_FILL_DIR_DEFAULTS);
+
+ return 0;
+}
+
static bool sf_stat(fuse_ino_t ino, struct single_file_stat *llstat)
{
struct fuse_entry_param *entry = &llstat->entry;
@@ -248,40 +306,82 @@ void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
else
fuse_reply_statx(req, 0, &stx, 0.0);
}
+
+int single_file_hl_statx(const char *path, int statx_flags, int statx_mask,
+ struct statx *stx, struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+ bool filled;
+
+ if (!ino)
+ return -ENOENT;
+
+ if ((statx_flags & AT_STATX_FORCE_SYNC) && is_single_file_ino(ino) &&
+ single_file.backing_fd >= 0) {
+ int ret = fsync(single_file.backing_fd);
+
+ if (ret)
+ return -errno;
+ }
+
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_statx(ino, statx_mask, stx);
+ pthread_mutex_unlock(&single_file.lock);
+
+ return filled ? 0 : -ENOENT;
+}
#else
void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
struct fuse_file_info *fi)
{
fuse_reply_err(req, ENOSYS);
}
+
+int single_file_hl_statx(const char *path, int statx_flags, int statx_mask,
+ struct statx *stx, struct fuse_file_info *fi)
+{
+ return -ENOSYS;
+}
#endif /* STATX_BASIC_STATS */
+static void single_file_statfs(struct statvfs *buf)
+{
+ pthread_mutex_lock(&single_file.lock);
+ buf->f_bsize = single_file.blocksize;
+ buf->f_frsize = 0;
+
+ buf->f_blocks = single_file.blocks;
+ buf->f_bfree = 0;
+ buf->f_bavail = 0;
+ buf->f_files = 1;
+ buf->f_ffree = 0;
+ buf->f_favail = 0;
+ buf->f_fsid = 0x50C00L;
+ buf->f_flag = 0;
+ if (single_file.ro)
+ buf->f_flag |= ST_RDONLY;
+ buf->f_namemax = 255;
+ pthread_mutex_unlock(&single_file.lock);
+}
+
void single_file_ll_statfs(fuse_req_t req, fuse_ino_t ino)
{
struct statvfs buf;
(void)ino;
- pthread_mutex_lock(&single_file.lock);
- buf.f_bsize = single_file.blocksize;
- buf.f_frsize = 0;
-
- buf.f_blocks = single_file.blocks;
- buf.f_bfree = 0;
- buf.f_bavail = 0;
- buf.f_files = 1;
- buf.f_ffree = 0;
- buf.f_favail = 0;
- buf.f_fsid = 0x50C00L;
- buf.f_flag = 0;
- if (single_file.ro)
- buf.f_flag |= ST_RDONLY;
- buf.f_namemax = 255;
- pthread_mutex_unlock(&single_file.lock);
-
+ single_file_statfs(&buf);
fuse_reply_statfs(req, &buf);
}
+int single_file_hl_statfs(const char *path, struct statvfs *buf)
+{
+ (void)path;
+
+ single_file_statfs(buf);
+ return 0;
+}
+
void single_file_ll_getattr(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
@@ -301,6 +401,28 @@ void single_file_ll_getattr(fuse_req_t req, fuse_ino_t ino,
llstat.entry.attr_timeout);
}
+int single_file_hl_getattr(const char *path, struct stat *stbuf,
+ struct fuse_file_info *fi)
+{
+ struct single_file_stat llstat;
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+ bool filled;
+
+ if (!ino)
+ return -ENOENT;
+
+ memset(&llstat, 0, sizeof(llstat));
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_stat(ino, &llstat);
+ pthread_mutex_unlock(&single_file.lock);
+
+ if (!filled)
+ return -ENOENT;
+
+ memcpy(stbuf, &llstat.entry.attr, sizeof(*stbuf));
+ return 0;
+}
+
static void get_now(struct timespec *now)
{
#ifdef CLOCK_REALTIME
@@ -353,6 +475,82 @@ void single_file_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
fuse_reply_err(req, EPERM);
}
+int single_file_hl_chmod(const char *path, mode_t mode,
+ struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+
+ if (!ino)
+ return -ENOENT;
+ if (ino != SINGLE_FILE_INO)
+ return -EPERM;
+ if (single_file.ro)
+ return -EPERM;
+
+ pthread_mutex_lock(&single_file.lock);
+ single_file.mode = (single_file.mode & S_IFMT) | (mode & ~S_IFMT);
+ get_now(&single_file.ctime);
+ pthread_mutex_unlock(&single_file.lock);
+
+ return 0;
+}
+
+static void set_time(const struct timespec *ctv, struct timespec *tv)
+{
+ switch (ctv->tv_nsec) {
+ case UTIME_OMIT:
+ return;
+ case UTIME_NOW:
+ get_now(tv);
+ break;
+ default:
+ memcpy(tv, ctv, sizeof(*tv));
+ break;
+ }
+}
+
+int single_file_hl_utimens(const char *path, const struct timespec ctv[2],
+ struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+
+ if (!ino)
+ return -ENOENT;
+ if (ino != SINGLE_FILE_INO)
+ return -EPERM;
+ if (single_file.ro)
+ return -EPERM;
+
+ pthread_mutex_lock(&single_file.lock);
+ set_time(&ctv[0], &single_file.atime);
+ set_time(&ctv[1], &single_file.mtime);
+ get_now(&single_file.ctime);
+ pthread_mutex_unlock(&single_file.lock);
+
+ return 0;
+}
+
+int single_file_hl_chown(const char *path, uid_t owner, gid_t group,
+ struct fuse_file_info *fi)
+{
+ (void)path;
+ (void)owner;
+ (void)group;
+ (void)fi;
+
+ return -EPERM;
+}
+
+int single_file_hl_truncate(const char *path, off_t len,
+ struct fuse_file_info *fi)
+{
+ (void)path;
+ (void)len;
+ (void)fi;
+
+ return -EPERM;
+}
+
void single_file_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
{
struct single_file_stat llstat;
@@ -384,6 +582,34 @@ void single_file_ll_open(fuse_req_t req, fuse_ino_t ino,
fuse_reply_open(req, fi);
}
+int single_file_hl_opendir(const char *path, struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_file_path_to_ino(path);
+
+ if (!ino)
+ return -ENOENT;
+ if (ino == SINGLE_FILE_INO)
+ return -ENOTDIR;
+
+ fi->fh = ino;
+ return 0;
+}
+
+int single_file_hl_open(const char *path, struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_file_path_to_ino(path);
+
+ if (!ino)
+ return -ENOENT;
+ if (ino != SINGLE_FILE_INO)
+ return -EISDIR;
+ if (single_file.ro && (fi->flags & O_ACCMODE) != O_RDONLY)
+ return -EROFS;
+
+ fi->fh = ino;
+ return 0;
+}
+
void single_file_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
struct fuse_file_info *fi)
{
@@ -401,6 +627,26 @@ void single_file_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
fuse_reply_err(req, ret);
}
+int single_file_hl_fsync(const char *path, int datasync,
+ struct fuse_file_info *fi)
+{
+ fuse_ino_t ino = single_open_file_path_to_ino(fi, path);
+
+ (void)datasync;
+
+ if (!ino)
+ return -ENOENT;
+
+ if (ino == SINGLE_FILE_INO) {
+ int ret = fsync(single_file.backing_fd);
+
+ if (ret)
+ return -errno;
+ }
+
+ return 0;
+}
+
unsigned long long parse_num_blocks(const char *arg, int log_block_size)
{
char *p;
^ permalink raw reply related
* [PATCH 11/13] example/service_ll: create a sample systemd service fuse server
From: Darrick J. Wong @ 2026-04-30 21:17 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Create a simple fuse server that can be run as a systemd service.
I plan to create some more single-file fuse server examples, so most of
the boilerplate code goes in a separate file.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
example/single_file.h | 157 +++++++++
lib/util.h | 35 ++
example/meson.build | 14 +
example/service_ll.c | 328 ++++++++++++++++++
example/service_ll.socket.in | 15 +
example/service_ll@.service | 102 ++++++
example/single_file.c | 746 ++++++++++++++++++++++++++++++++++++++++++
meson.build | 1
8 files changed, 1398 insertions(+)
create mode 100644 example/single_file.h
create mode 100644 example/service_ll.c
create mode 100644 example/service_ll.socket.in
create mode 100644 example/service_ll@.service
create mode 100644 example/single_file.c
diff --git a/example/single_file.h b/example/single_file.h
new file mode 100644
index 00000000000000..6e5d3e7c975385
--- /dev/null
+++ b/example/single_file.h
@@ -0,0 +1,157 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ *
+ * This file is shared library code for example fuse servers that want to
+ * expose a single regular file that wraps another file in a manner that goes
+ * beyond simple passthrough. It is not itself a fuse server.
+ */
+#ifndef FUSE_SINGLE_FILE_H_
+#define FUSE_SINGLE_FILE_H_
+
+static inline uint64_t round_up(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = b % align;
+ if (m)
+ b += align - m;
+ return b;
+}
+
+static inline uint64_t round_down(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = b % align;
+ return b - m;
+}
+
+static inline uint64_t howmany(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = (b % align) ? 1 : 0;
+ return (b / align) + m;
+}
+
+struct single_file {
+ int backing_fd;
+
+ int64_t isize;
+ uint64_t blocks;
+
+ mode_t mode;
+
+ bool ro;
+ bool allow_dio;
+ bool sync;
+ bool require_bdev;
+
+ unsigned int blocksize;
+
+ struct timespec atime;
+ struct timespec mtime;
+ struct timespec ctime;
+
+ pthread_mutex_t lock;
+};
+
+extern struct single_file single_file;
+
+static inline uint64_t b_to_fsbt(uint64_t off)
+{
+ return off / single_file.blocksize;
+}
+
+static inline uint64_t b_to_fsb(uint64_t off)
+{
+ return (off + single_file.blocksize - 1) / single_file.blocksize;
+}
+
+static inline uint64_t fsb_to_b(uint64_t fsb)
+{
+ return fsb * single_file.blocksize;
+}
+
+enum single_file_opt_keys {
+ SINGLE_FILE_RO = 171717, /* how many options could we possibly have? */
+ SINGLE_FILE_RW,
+ SINGLE_FILE_REQUIRE_BDEV,
+ SINGLE_FILE_DIO,
+ SINGLE_FILE_NODIO,
+ SINGLE_FILE_SYNC,
+ SINGLE_FILE_NOSYNC,
+ SINGLE_FILE_SIZE,
+ SINGLE_FILE_BLOCKSIZE,
+
+ SINGLE_FILE_NR_KEYS,
+};
+
+#define SINGLE_FILE_OPT_KEYS \
+ FUSE_OPT_KEY("ro", SINGLE_FILE_RO), \
+ FUSE_OPT_KEY("rw", SINGLE_FILE_RW), \
+ FUSE_OPT_KEY("require_bdev", SINGLE_FILE_REQUIRE_BDEV), \
+ FUSE_OPT_KEY("dio", SINGLE_FILE_DIO), \
+ FUSE_OPT_KEY("nodio", SINGLE_FILE_NODIO), \
+ FUSE_OPT_KEY("sync", SINGLE_FILE_SYNC), \
+ FUSE_OPT_KEY("nosync", SINGLE_FILE_NOSYNC), \
+ FUSE_OPT_KEY("size=%s", SINGLE_FILE_SIZE), \
+ FUSE_OPT_KEY("blocksize=%s", SINGLE_FILE_BLOCKSIZE)
+
+int single_file_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs);
+
+unsigned long long parse_num_blocks(const char *arg, int log_block_size);
+
+struct fuse_service;
+int single_file_service_open(struct fuse_service *sf, const char *path);
+
+void single_file_check_read(off_t pos, size_t *count);
+int single_file_check_write(off_t pos, size_t *count);
+
+int single_file_configure(const char *device, const char *filename);
+int single_file_configure_simple(const char *filename);
+void single_file_close(void);
+
+ssize_t single_file_pwrite(const char *buf, size_t count, off_t pos);
+ssize_t single_file_pread(char *buf, size_t count, off_t pos);
+
+/* low-level fuse operation handlers */
+
+bool is_single_file_child(fuse_ino_t parent, const char *name);
+bool is_single_file_ino(fuse_ino_t ino);
+
+void single_file_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+ off_t off, struct fuse_file_info *fi);
+
+void single_file_ll_statfs(fuse_req_t req, fuse_ino_t ino);
+
+void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
+ struct fuse_file_info *fi);
+
+void single_file_ll_getattr(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi);
+void single_file_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+ int to_set, struct fuse_file_info *fi);
+
+void single_file_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name);
+void single_file_ll_open(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi);
+
+void single_file_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
+ struct fuse_file_info *fi);
+
+int reply_buf_limited(fuse_req_t req, const char *buf, size_t bufsize,
+ off_t off, size_t maxsize);
+
+#endif /* FUSE_SINGLE_FILE_H_ */
diff --git a/lib/util.h b/lib/util.h
index 107a2bfdd6105b..6ec6604fb74caf 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -4,6 +4,9 @@
#include <stdint.h>
#include <stdbool.h>
+#define max(x, y) ((x) > (y) ? (x) : (y))
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
#define ROUND_UP(val, round_to) (((val) + (round_to - 1)) & ~(round_to - 1))
#define likely(x) __builtin_expect(!!(x), 1)
@@ -46,4 +49,36 @@ static inline uint64_t fuse_higher_32_bits(uint64_t nr)
#define fallthrough do {} while (0)
#endif
+static inline uint64_t round_up(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = b % align;
+ if (m)
+ b += align - m;
+ return b;
+}
+
+static inline uint64_t round_down(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = b % align;
+ return b - m;
+}
+
+static inline uint64_t howmany(uint64_t b, unsigned int align)
+{
+ unsigned int m;
+
+ if (align == 0)
+ return b;
+ m = (b % align) ? 1 : 0;
+ return (b / align) + m;
+}
+
#endif /* FUSE_UTIL_H_ */
diff --git a/example/meson.build b/example/meson.build
index 76cf2d96db0349..e948f6ba74fdfa 100644
--- a/example/meson.build
+++ b/example/meson.build
@@ -12,6 +12,15 @@ if not platform.endswith('bsd') and platform != 'dragonfly'
examples += [ 'null' ]
endif
+single_file_examples = [ ]
+
+if platform.endswith('linux')
+ single_file_examples += [ 'service_ll' ]
+ configure_file(input: 'service_ll.socket.in',
+ output: 'service_ll.socket',
+ configuration: private_cfg)
+endif
+
threaded_examples = [ 'notify_inval_inode',
'invalidate_path',
'notify_store_retrieve',
@@ -25,6 +34,11 @@ foreach ex : examples
install: false)
endforeach
+foreach ex : single_file_examples
+ executable(ex, [ex + '.c', 'single_file.c'],
+ dependencies: [ libfuse_dep ],
+ install: false)
+endforeach
foreach ex : threaded_examples
executable(ex, ex + '.c',
diff --git a/example/service_ll.c b/example/service_ll.c
new file mode 100644
index 00000000000000..d045176443d4e3
--- /dev/null
+++ b/example/service_ll.c
@@ -0,0 +1,328 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ */
+
+/** @file
+ *
+ * Minimal example filesystem using low-level API and systemd service API.
+ *
+ * - Shows how to build a low level FUSE filesystem server that can be managed
+ * by systemd
+ * - Enables on-demand filesystem mounting via socket activation
+ * - Demonstrates requesting resources from the mount-caller's environment
+ * - Allows running FUSE servers with minimal privileges; isolated mount,
+ * network, and pid namespaces; and a separate uid/gid (unlike traditional
+ * FUSE which needs mount permissions and runs in the caller's environment)
+ *
+ * Compile with:
+ *
+ * gcc -Wall single_file.c service_ll.c `pkg-config fuse3 --cflags --libs` -o service_ll
+ *
+ * Note: If the pkg-config command fails due to the absence of the fuse3.pc
+ * file, you should configure the path to the fuse3.pc file in the
+ * PKG_CONFIG_PATH variable.
+ *
+ * Change the ExecStart line in service_ll@.service:
+ *
+ * ExecStart=/path/to/service_ll
+ *
+ * to point to the actual path of the service_ll binary.
+ *
+ * Finally, install the service_ll@.service and service_ll.socket files to the
+ * systemd service directory, usually /run/systemd/system. Run these commands
+ * to activate:
+ *
+ * systemctl daemon-reload
+ * systemctl start service_ll.socket
+ *
+ * Then mount with:
+ *
+ * mount -t fuse.service_ll /dev/sda /mnt
+ *
+ * ## Source code ##
+ * \include service_ll.c
+ * \include service_ll.socket
+ * \include service_ll@.service
+ * \include single_file.c
+ * \include single_file.h
+ */
+
+#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 19)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <fuse_lowlevel.h>
+#include <fuse_service.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include "single_file.h"
+
+struct service_ll {
+ struct fuse_session *se;
+ char *device;
+ struct fuse_service *service;
+
+ /* really booleans */
+ int debug;
+};
+
+static struct service_ll ll = { };
+
+static void service_ll_init(void *userdata, struct fuse_conn_info *conn)
+{
+ (void)userdata;
+
+ conn->time_gran = 1;
+}
+
+static void service_ll_read(fuse_req_t req, fuse_ino_t ino, size_t count,
+ off_t pos, struct fuse_file_info *fi)
+{
+ void *buf = NULL;
+ ssize_t got;
+ int ret;
+
+ if (!is_single_file_ino(ino)) {
+ ret = EIO;
+ goto out_reply;
+ }
+
+ if (ll.debug)
+ fprintf(stderr, "%s: pos 0x%llx count 0x%llx\n",
+ __func__,
+ (unsigned long long)pos,
+ (unsigned long long)count);
+
+ if (!single_file.allow_dio && fi->direct_io) {
+ ret = ENOSYS;
+ goto out_reply;
+ }
+
+ single_file_check_read(pos, &count);
+
+ if (!count) {
+ fuse_reply_buf(req, buf, 0);
+ return;
+ }
+
+ buf = malloc(count);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out_reply;
+ }
+
+ got = single_file_pread(buf, count, pos);
+ if (got < 0) {
+ ret = -got;
+ goto out_reply;
+ }
+
+ fuse_reply_buf(req, buf, got);
+ goto out_buf;
+
+out_reply:
+ fuse_reply_err(req, ret);
+out_buf:
+ free(buf);
+}
+
+static void service_ll_write(fuse_req_t req, fuse_ino_t ino, const char *buf,
+ size_t count, off_t pos,
+ struct fuse_file_info *fi)
+{
+ ssize_t got;
+ int ret;
+
+ if (!is_single_file_ino(ino)) {
+ ret = EIO;
+ goto out_reply;
+ }
+
+ if (ll.debug)
+ fprintf(stderr, "%s: pos 0x%llx count 0x%llx\n",
+ __func__,
+ (unsigned long long)pos,
+ (unsigned long long)count);
+
+ if (!single_file.allow_dio && fi->direct_io) {
+ ret = ENOSYS;
+ goto out_reply;
+ }
+
+ ret = -single_file_check_write(pos, &count);
+ if (ret)
+ goto out_reply;
+
+ if (!count) {
+ fuse_reply_write(req, 0);
+ return;
+ }
+
+ got = single_file_pwrite(buf, count, pos);
+ if (got < 0) {
+ ret = -got;
+ goto out_reply;
+ }
+
+ fuse_reply_write(req, got);
+ return;
+
+out_reply:
+ fuse_reply_err(req, ret);
+}
+
+static const struct fuse_lowlevel_ops service_ll_oper = {
+ .lookup = single_file_ll_lookup,
+ .getattr = single_file_ll_getattr,
+ .setattr = single_file_ll_setattr,
+ .readdir = single_file_ll_readdir,
+ .open = single_file_ll_open,
+ .statfs = single_file_ll_statfs,
+ .statx = single_file_ll_statx,
+ .fsync = single_file_ll_fsync,
+
+ .init = service_ll_init,
+ .read = service_ll_read,
+ .write = service_ll_write,
+};
+
+#define SERVICE_LL_OPT(t, p, v) { t, offsetof(struct service_ll, p), v }
+
+static struct fuse_opt service_ll_opts[] = {
+ SERVICE_LL_OPT("debug", debug, 1),
+ SINGLE_FILE_OPT_KEYS,
+ FUSE_OPT_END
+};
+
+static int service_ll_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs)
+{
+ int ret = single_file_opt_proc(data, arg, key, outargs);
+
+ if (ret < 1)
+ return ret;
+
+ switch (key) {
+ case FUSE_OPT_KEY_NONOPT:
+ if (!ll.device) {
+ ll.device = strdup(arg);
+ return 0;
+ }
+ return 1;
+ }
+
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+ struct fuse_cmdline_opts opts = { };
+ struct fuse_loop_config *config = NULL;
+ int ret = 1;
+
+ if (fuse_service_accept(&ll.service))
+ goto err_args;
+
+ if (!fuse_service_accepted(ll.service))
+ goto err_args;
+
+ if (fuse_service_append_args(ll.service, &args))
+ goto err_service;
+
+ if (fuse_opt_parse(&args, &ll, service_ll_opts, service_ll_opt_proc))
+ goto err_service;
+
+ if (fuse_service_parse_cmdline_opts(&args, &opts))
+ goto err_service;
+
+ if (opts.show_help) {
+ printf("usage: %s [options] <device> <mountpoint>\n\n", argv[0]);
+ fuse_cmdline_help();
+ fuse_lowlevel_help();
+ ret = 0;
+ goto err_service;
+ } else if (opts.show_version) {
+ printf("FUSE library version %s\n", fuse_pkgversion());
+ fuse_lowlevel_version();
+ ret = 0;
+ goto err_service;
+ }
+
+ if (!opts.mountpoint || !ll.device) {
+ printf("usage: %s [options] <device> <mountpoint>\n", argv[0]);
+ printf(" %s --help\n", argv[0]);
+ goto err_service;
+ }
+
+ if (single_file_service_open(ll.service, ll.device))
+ goto err_service;
+
+ if (fuse_service_finish_file_requests(ll.service))
+ goto err_singlefile;
+
+ if (single_file_configure(ll.device, NULL))
+ goto err_singlefile;
+
+ ll.se = fuse_session_new(&args, &service_ll_oper,
+ sizeof(service_ll_oper), NULL);
+ if (ll.se == NULL)
+ goto err_singlefile;
+
+ if (!opts.singlethread) {
+ config = fuse_loop_cfg_create();
+ if (!config) {
+ ret = 1;
+ goto err_session;
+ }
+ }
+
+ if (fuse_set_signal_handlers(ll.se))
+ goto err_loopcfg;
+
+ if (fuse_service_session_mount(ll.service, ll.se, S_IFDIR, &opts))
+ goto err_signals;
+
+ /* Block until ctrl+c or fusermount -u */
+ if (opts.singlethread) {
+ fuse_service_send_goodbye(ll.service, 0);
+ fuse_service_release(ll.service);
+ ret = fuse_session_loop(ll.se);
+ } else {
+ fuse_loop_cfg_set_clone_fd(config, opts.clone_fd);
+ fuse_loop_cfg_set_max_threads(config, opts.max_threads);
+
+ fuse_service_send_goodbye(ll.service, 0);
+ fuse_service_release(ll.service);
+ ret = fuse_session_loop_mt(ll.se, config);
+ }
+
+err_signals:
+ fuse_remove_signal_handlers(ll.se);
+err_loopcfg:
+ fuse_loop_cfg_destroy(config);
+err_session:
+ fuse_session_destroy(ll.se);
+err_singlefile:
+ single_file_close();
+err_service:
+ free(opts.mountpoint);
+ free(ll.device);
+ fuse_service_send_goodbye(ll.service, ret);
+ fuse_service_destroy(&ll.service);
+err_args:
+ fuse_opt_free_args(&args);
+ return fuse_service_exit(ret);
+}
diff --git a/example/service_ll.socket.in b/example/service_ll.socket.in
new file mode 100644
index 00000000000000..c41c382878a0cd
--- /dev/null
+++ b/example/service_ll.socket.in
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=Socket for service_ll Service
+
+[Socket]
+ListenSequentialPacket=@FUSE_SERVICE_SOCKET_DIR_RAW@/service_ll
+Accept=yes
+SocketMode=@FUSE_SERVICE_SOCKET_PERMS@
+RemoveOnStop=yes
+
+[Install]
+WantedBy=sockets.target
diff --git a/example/service_ll@.service b/example/service_ll@.service
new file mode 100644
index 00000000000000..016d839babe3cc
--- /dev/null
+++ b/example/service_ll@.service
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (C) 2026 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+[Unit]
+Description=service_ll Sample Fuse Service
+
+# Don't leave failed units behind, systemd does not clean them up!
+CollectMode=inactive-or-failed
+
+[Service]
+Type=exec
+ExecStart=/path/to/service_ll
+
+# Try to capture core dumps
+LimitCORE=infinity
+
+SyslogIdentifier=%N
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Don't let us see anything in the regular system, and don't run as root
+DynamicUser=true
+ProtectSystem=strict
+ProtectHome=true
+PrivateTmp=true
+PrivateDevices=true
+PrivateUsers=true
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+RestrictFileSystems=
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+
+SystemCallFilter=~@clock
+SystemCallFilter=~@cpu-emulation
+SystemCallFilter=~@debug
+SystemCallFilter=~@module
+SystemCallFilter=~@reboot
+SystemCallFilter=~@swap
+
+SystemCallFilter=~@mount
+
+# libfuse io_uring wants to pin cores and memory
+SystemCallFilter=mbind
+SystemCallFilter=sched_setaffinity
+
+# Leave a breadcrumb if we get whacked by the system call filter
+SystemCallErrorNumber=EL3RST
+
+# Log to the kernel dmesg, just like an in-kernel filesystem driver
+StandardOutput=append:/dev/ttyprintk
+StandardError=append:/dev/ttyprintk
+
+# Run with no capabilities at all
+CapabilityBoundingSet=
+AmbientCapabilities=
+NoNewPrivileges=true
+
+# We don't create files
+UMask=7777
+
+# No access to hardware /dev files at all
+ProtectClock=true
+DevicePolicy=closed
+
+# Don't mess with set[ug]id anything.
+RestrictSUIDSGID=true
+
+# Don't let OOM kills of processes in this containment group kill the whole
+# service, because we don't want filesystem drivers to go down.
+OOMPolicy=continue
+OOMScoreAdjust=-1000
diff --git a/example/single_file.c b/example/single_file.c
new file mode 100644
index 00000000000000..9b6f76504686b5
--- /dev/null
+++ b/example/single_file.c
@@ -0,0 +1,746 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2026 Oracle.
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file GPL2.txt.
+ *
+ * This file is shared library code for example fuse servers that want to
+ * expose a single regular file that wraps another file in a manner that goes
+ * beyond simple passthrough. It is not itself a fuse server.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#ifdef __linux__
+#include <linux/fs.h>
+#include <linux/stat.h>
+#endif
+
+#define FUSE_USE_VERSION (FUSE_MAKE_VERSION(3, 19))
+
+#include "fuse_lowlevel.h"
+#include "fuse_service.h"
+#include "single_file.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+#if __has_attribute(__fallthrough__)
+#define fallthrough __attribute__((__fallthrough__))
+#else
+#define fallthrough do {} while (0)
+#endif
+
+struct dirbuf {
+ char *p;
+ size_t size;
+};
+
+struct single_file_stat {
+ struct fuse_entry_param entry;
+};
+
+#define SINGLE_FILE_INO (FUSE_ROOT_ID + 1)
+
+static const char *single_file_name = "single_file";
+static bool single_file_name_set;
+static struct timespec startup_time;
+
+struct single_file single_file = {
+ .backing_fd = -1,
+ .allow_dio = true,
+ .mode = S_IFREG | 0444,
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static void dirbuf_add(fuse_req_t req, struct dirbuf *b, const char *name,
+ fuse_ino_t ino)
+{
+ struct stat stbuf;
+ size_t oldsize = b->size;
+
+ b->size += fuse_add_direntry(req, NULL, 0, name, NULL, 0);
+ b->p = (char *) realloc(b->p, b->size);
+ memset(&stbuf, 0, sizeof(stbuf));
+ stbuf.st_ino = ino;
+ fuse_add_direntry(req, b->p + oldsize, b->size - oldsize, name, &stbuf,
+ b->size);
+}
+
+int reply_buf_limited(fuse_req_t req, const char *buf, size_t bufsize,
+ off_t off, size_t maxsize)
+{
+ if (off < bufsize)
+ return fuse_reply_buf(req, buf + off,
+ min(bufsize - off, maxsize));
+ else
+ return fuse_reply_buf(req, NULL, 0);
+}
+
+bool is_single_file_child(fuse_ino_t parent, const char *name)
+{
+ return parent == FUSE_ROOT_ID &&
+ strcmp(name, single_file_name) == 0;
+}
+
+bool is_single_file_ino(fuse_ino_t ino)
+{
+ return ino == SINGLE_FILE_INO;
+}
+
+void single_file_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+ off_t off, struct fuse_file_info *fi)
+{
+ struct dirbuf b;
+
+ (void) fi;
+
+ switch (ino) {
+ case FUSE_ROOT_ID:
+ break;
+ case SINGLE_FILE_INO:
+ fuse_reply_err(req, ENOTDIR);
+ return;
+ default:
+ fuse_reply_err(req, ENOENT);
+ return;
+ }
+
+ memset(&b, 0, sizeof(b));
+ dirbuf_add(req, &b, ".", FUSE_ROOT_ID);
+ dirbuf_add(req, &b, "..", FUSE_ROOT_ID);
+ dirbuf_add(req, &b, single_file_name, SINGLE_FILE_INO);
+ reply_buf_limited(req, b.p, b.size, off, size);
+ free(b.p);
+}
+
+static bool sf_stat(fuse_ino_t ino, struct single_file_stat *llstat)
+{
+ struct fuse_entry_param *entry = &llstat->entry;
+ struct stat *stbuf = &entry->attr;
+
+ if (ino == FUSE_ROOT_ID) {
+ stbuf->st_mode = S_IFDIR | 0555;
+ stbuf->st_nlink = 2;
+ stbuf->st_atim = startup_time;
+ stbuf->st_mtim = startup_time;
+ stbuf->st_ctim = startup_time;
+ } else if (ino == SINGLE_FILE_INO) {
+ stbuf->st_mode = single_file.mode;
+ stbuf->st_nlink = 1;
+ stbuf->st_size = single_file.isize;
+ stbuf->st_blksize = single_file.blocksize;
+ stbuf->st_blocks = howmany(single_file.isize, 512);
+ stbuf->st_atim = single_file.atime;
+ stbuf->st_mtim = single_file.mtime;
+ stbuf->st_ctim = single_file.ctime;
+ } else {
+ return false;
+ }
+ stbuf->st_ino = ino;
+
+ entry->generation = ino + 1;
+ entry->attr_timeout = 0.0;
+ entry->entry_timeout = 0.0;
+ entry->ino = ino;
+
+ return true;
+}
+
+#if defined(STATX_BASIC_STATS)
+static inline void sf_set_statx_attr(struct statx *stx,
+ uint64_t statx_flag, int set)
+{
+ if (set)
+ stx->stx_attributes |= statx_flag;
+ stx->stx_attributes_mask |= statx_flag;
+}
+
+static void sf_statx_directio(struct statx *stx)
+{
+ struct statx devx;
+ int ret;
+
+ ret = statx(single_file.backing_fd, "", AT_EMPTY_PATH, STATX_DIOALIGN,
+ &devx);
+ if (ret)
+ return;
+ if (!(devx.stx_mask & STATX_DIOALIGN))
+ return;
+
+ stx->stx_mask |= STATX_DIOALIGN;
+ stx->stx_dio_mem_align = devx.stx_dio_mem_align;
+ stx->stx_dio_offset_align = devx.stx_dio_offset_align;
+}
+
+static bool sf_statx(fuse_ino_t ino, int statx_mask, struct statx *stx)
+{
+ (void)statx_mask;
+
+ if (ino == FUSE_ROOT_ID) {
+ stx->stx_mask = STATX_BASIC_STATS | STATX_BTIME;
+ stx->stx_mode = S_IFDIR | 0555;
+ stx->stx_nlink = 2;
+ stx->stx_atime.tv_sec = startup_time.tv_sec;
+ stx->stx_atime.tv_nsec = startup_time.tv_nsec;
+ stx->stx_mtime.tv_sec = startup_time.tv_sec;
+ stx->stx_mtime.tv_nsec = startup_time.tv_nsec;
+ stx->stx_ctime.tv_sec = startup_time.tv_sec;
+ stx->stx_ctime.tv_nsec = startup_time.tv_nsec;
+ stx->stx_btime.tv_sec = startup_time.tv_sec;
+ stx->stx_btime.tv_nsec = startup_time.tv_nsec;
+ } else if (ino == SINGLE_FILE_INO) {
+ stx->stx_mask = STATX_BASIC_STATS | STATX_BTIME;
+ stx->stx_mode = single_file.mode;
+ stx->stx_nlink = 1;
+ stx->stx_size = single_file.isize;
+ stx->stx_blksize = single_file.blocksize;
+ stx->stx_blocks = howmany(single_file.isize, 512);
+ stx->stx_atime.tv_sec = single_file.atime.tv_sec;
+ stx->stx_atime.tv_nsec = single_file.atime.tv_nsec;
+ stx->stx_mtime.tv_sec = single_file.mtime.tv_sec;
+ stx->stx_mtime.tv_nsec = single_file.mtime.tv_nsec;
+ stx->stx_ctime.tv_sec = single_file.ctime.tv_sec;
+ stx->stx_ctime.tv_nsec = single_file.ctime.tv_nsec;
+ stx->stx_btime.tv_sec = startup_time.tv_sec;
+ stx->stx_btime.tv_nsec = startup_time.tv_nsec;
+ } else {
+ return false;
+ }
+ stx->stx_ino = ino;
+
+ sf_set_statx_attr(stx, STATX_ATTR_IMMUTABLE, single_file.ro);
+ sf_statx_directio(stx);
+
+ return true;
+}
+
+void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
+ struct fuse_file_info *fi)
+{
+ struct statx stx = { };
+ bool filled;
+
+ (void)fi;
+
+ if ((flags & AT_STATX_FORCE_SYNC) && is_single_file_ino(ino) &&
+ single_file.backing_fd >= 0) {
+ int ret = fsync(single_file.backing_fd);
+
+ if (ret) {
+ fuse_reply_err(req, errno);
+ return;
+ }
+ }
+
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_statx(ino, mask, &stx);
+ pthread_mutex_unlock(&single_file.lock);
+ if (!filled)
+ fuse_reply_err(req, ENOENT);
+ else
+ fuse_reply_statx(req, 0, &stx, 0.0);
+}
+#else
+void single_file_ll_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask,
+ struct fuse_file_info *fi)
+{
+ fuse_reply_err(req, ENOSYS);
+}
+#endif /* STATX_BASIC_STATS */
+
+void single_file_ll_statfs(fuse_req_t req, fuse_ino_t ino)
+{
+ struct statvfs buf;
+
+ (void)ino;
+
+ pthread_mutex_lock(&single_file.lock);
+ buf.f_bsize = single_file.blocksize;
+ buf.f_frsize = 0;
+
+ buf.f_blocks = single_file.blocks;
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ buf.f_files = 1;
+ buf.f_ffree = 0;
+ buf.f_favail = 0;
+ buf.f_fsid = 0x50C00L;
+ buf.f_flag = 0;
+ if (single_file.ro)
+ buf.f_flag |= ST_RDONLY;
+ buf.f_namemax = 255;
+ pthread_mutex_unlock(&single_file.lock);
+
+ fuse_reply_statfs(req, &buf);
+}
+
+void single_file_ll_getattr(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ struct single_file_stat llstat;
+ bool filled;
+
+ (void) fi;
+
+ memset(&llstat, 0, sizeof(llstat));
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_stat(ino, &llstat);
+ pthread_mutex_unlock(&single_file.lock);
+ if (!filled)
+ fuse_reply_err(req, ENOENT);
+ else
+ fuse_reply_attr(req, &llstat.entry.attr,
+ llstat.entry.attr_timeout);
+}
+
+static void get_now(struct timespec *now)
+{
+#ifdef CLOCK_REALTIME
+ if (!clock_gettime(CLOCK_REALTIME, now))
+ return;
+#endif
+
+ now->tv_sec = time(NULL);
+ now->tv_nsec = 0;
+}
+
+void single_file_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+ int to_set, struct fuse_file_info *fi)
+{
+ struct timespec now;
+
+ if (ino != SINGLE_FILE_INO)
+ goto deny;
+ if (to_set & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID |
+ FUSE_SET_ATTR_SIZE))
+ goto deny;
+ if (single_file.ro)
+ goto deny;
+
+ get_now(&now);
+
+ pthread_mutex_lock(&single_file.lock);
+ if (to_set & FUSE_SET_ATTR_MODE)
+ single_file.mode = (single_file.mode & S_IFMT) |
+ (attr->st_mode & ~S_IFMT);
+ if (to_set & FUSE_SET_ATTR_ATIME) {
+ if (to_set & FUSE_SET_ATTR_ATIME_NOW)
+ single_file.atime = now;
+ else
+ single_file.atime = attr->st_atim;
+ }
+ if (to_set & FUSE_SET_ATTR_MTIME) {
+ if (to_set & FUSE_SET_ATTR_MTIME_NOW)
+ single_file.mtime = now;
+ else
+ single_file.mtime = attr->st_mtim;
+ }
+ if (to_set & FUSE_SET_ATTR_CTIME)
+ single_file.ctime = now;
+ pthread_mutex_unlock(&single_file.lock);
+
+ single_file_ll_getattr(req, ino, fi);
+ return;
+deny:
+ fuse_reply_err(req, EPERM);
+}
+
+void single_file_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+ struct single_file_stat llstat;
+ bool filled;
+
+ if (!is_single_file_child(parent, name)) {
+ fuse_reply_err(req, ENOENT);
+ return;
+ }
+
+ memset(&llstat, 0, sizeof(llstat));
+ pthread_mutex_lock(&single_file.lock);
+ filled = sf_stat(SINGLE_FILE_INO, &llstat);
+ pthread_mutex_unlock(&single_file.lock);
+ if (!filled)
+ fuse_reply_err(req, ENOENT);
+ else
+ fuse_reply_entry(req, &llstat.entry);
+}
+
+void single_file_ll_open(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ if (ino != SINGLE_FILE_INO)
+ fuse_reply_err(req, EISDIR);
+ else if (single_file.ro && (fi->flags & O_ACCMODE) != O_RDONLY)
+ fuse_reply_err(req, EROFS);
+ else
+ fuse_reply_open(req, fi);
+}
+
+void single_file_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
+ struct fuse_file_info *fi)
+{
+ int ret = 0;
+
+ (void)datasync;
+ (void)fi;
+
+ if (ino == SINGLE_FILE_INO) {
+ ret = fsync(single_file.backing_fd);
+ if (ret)
+ ret = errno;
+ }
+
+ fuse_reply_err(req, ret);
+}
+
+unsigned long long parse_num_blocks(const char *arg, int log_block_size)
+{
+ char *p;
+ unsigned long long num;
+
+ num = strtoull(arg, &p, 0);
+
+ if (p[0] && p[1])
+ return 0;
+
+ switch (*p) {
+ case 'T': case 't':
+ num <<= 10;
+ fallthrough;
+ case 'G': case 'g':
+ num <<= 10;
+ fallthrough;
+ case 'M': case 'm':
+ num <<= 10;
+ fallthrough;
+ case 'K': case 'k':
+ if (log_block_size < 0)
+ num <<= 10;
+ else
+ num >>= log_block_size;
+ break;
+ case 's':
+ if (log_block_size < 0)
+ num <<= 9;
+ else
+ num >>= (1+log_block_size);
+ break;
+ case '\0':
+ break;
+ default:
+ return 0;
+ }
+ return num;
+}
+
+static int single_file_set_blocksize(const char *arg)
+{
+ unsigned long long l = parse_num_blocks(arg, -1);
+
+ if (l < 512 || l > INT32_MAX || (l & (l - 1)) != 0) {
+ fprintf(stderr, "%s: block size must be power of two between 512 and 2G.\n",
+ arg);
+ return -1;
+ }
+
+ /* do not pass through to libfuse */
+ single_file.blocksize = l;
+ return 0;
+}
+
+static int single_file_set_size(const char *arg)
+{
+ unsigned long long l = parse_num_blocks(arg, -1);
+
+ if (l < 1 || (l & 511) != 0 || l > INT64_MAX) {
+ fprintf(stderr, "%s: size must be multiple of 512 and larger than zero.\n",
+ arg);
+ return -1;
+ }
+
+ /* do not pass through to libfuse */
+ single_file.isize = l;
+ return 0;
+}
+
+int single_file_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs)
+{
+ (void)data;
+ (void)outargs;
+
+ switch (key) {
+ case SINGLE_FILE_RO:
+ /* pass through to libfuse */
+ single_file.ro = true;
+ return 1;
+ case SINGLE_FILE_RW:
+ /* pass through to libfuse */
+ single_file.ro = false;
+ return 1;
+ case SINGLE_FILE_REQUIRE_BDEV:
+ single_file.require_bdev = true;
+ return 0;
+ case SINGLE_FILE_DIO:
+ single_file.allow_dio = true;
+ return 0;
+ case SINGLE_FILE_NODIO:
+ single_file.allow_dio = false;
+ return 0;
+ case SINGLE_FILE_SYNC:
+ single_file.sync = true;
+ return 0;
+ case SINGLE_FILE_NOSYNC:
+ single_file.sync = false;
+ return 0;
+ case SINGLE_FILE_BLOCKSIZE:
+ return single_file_set_blocksize(arg + 10);
+ case SINGLE_FILE_SIZE:
+ return single_file_set_size(arg + 5);
+ }
+
+ return 1;
+}
+
+int single_file_service_open(struct fuse_service *sf, const char *path)
+{
+ int open_flags = single_file.ro ? O_RDONLY : O_RDWR;
+ int fd;
+ int ret;
+
+again:
+ if (single_file.require_bdev)
+ ret = fuse_service_request_blockdev(sf, path,
+ open_flags | O_EXCL, 0, 0,
+ single_file.blocksize);
+ else
+ ret = fuse_service_request_file(sf, path, open_flags | O_EXCL,
+ 0, 0);
+ if (ret)
+ return ret;
+
+ if (!single_file.ro && open_flags == O_RDONLY)
+ single_file.ro = true;
+
+ ret = fuse_service_receive_file(sf, path, &fd);
+ if (ret)
+ return ret;
+
+ /* downgrade from rw to ro if necessary */
+ if ((fd == -EPERM || fd == -EACCES || fd == -EROFS) &&
+ open_flags == O_RDWR) {
+ open_flags = O_RDONLY;
+ goto again;
+ }
+
+ if (fd < 0) {
+ fprintf(stderr, "%s: opening file: %s.\n",
+ path, strerror(-fd));
+ return -1;
+ }
+
+ single_file.backing_fd = fd;
+ return 0;
+}
+
+int single_file_check_write(off_t pos, size_t *count)
+{
+ if (pos >= single_file.isize)
+ return -EFBIG;
+
+ if (*count > single_file.isize)
+ *count = single_file.isize;
+ if (pos >= single_file.isize - *count)
+ *count = single_file.isize - pos;
+
+ return 0;
+}
+
+void single_file_check_read(off_t pos, size_t *count)
+{
+ int ret = single_file_check_write(pos, count);
+
+ if (ret)
+ *count = 0;
+}
+
+ssize_t single_file_pwrite(const char *buf, size_t count, off_t pos)
+{
+ ssize_t processed = 0;
+ ssize_t got;
+
+ while ((got = pwrite(single_file.backing_fd, buf, count, pos)) > 0) {
+ processed += got;
+ pos += got;
+ buf += got;
+ count -= got;
+ }
+
+ if (processed > 0) {
+ struct timespec now;
+
+ if (single_file.sync) {
+ int ret = fsync(single_file.backing_fd);
+
+ if (ret < 0)
+ return -errno;
+ }
+
+ get_now(&now);
+
+ pthread_mutex_lock(&single_file.lock);
+ single_file.mtime = now;
+ single_file.ctime = now;
+ pthread_mutex_unlock(&single_file.lock);
+
+ return processed;
+ }
+
+ if (got < 0)
+ return -errno;
+ return 0;
+}
+
+ssize_t single_file_pread(char *buf, size_t count, off_t pos)
+{
+ ssize_t processed = 0;
+ ssize_t got;
+
+ while ((got = pread(single_file.backing_fd, buf, count, pos)) > 0) {
+ processed += got;
+ pos += got;
+ buf += got;
+ count -= got;
+ }
+
+ if (processed)
+ return processed;
+ if (got < 0)
+ return -errno;
+ return 0;
+}
+
+int single_file_configure(const char *device, const char *filename)
+{
+ struct stat stbuf;
+ unsigned long long backing_size;
+ unsigned int proposed_blocksize;
+ int lbasize;
+ int ret;
+
+ ret = fstat(single_file.backing_fd, &stbuf);
+ if (ret) {
+ perror(device);
+ return -1;
+ }
+ lbasize = stbuf.st_blksize;
+ backing_size = stbuf.st_size;
+
+ if (S_ISBLK(stbuf.st_mode)) {
+#ifdef BLKSSZGET
+ ret = ioctl(single_file.backing_fd, BLKSSZGET, &lbasize);
+ if (ret) {
+ perror(device);
+ return -1;
+ }
+#endif
+
+#ifdef BLKGETSIZE64
+ ret = ioctl(single_file.backing_fd, BLKGETSIZE64, &backing_size);
+ if (ret) {
+ perror(device);
+ return -1;
+ }
+#endif
+ }
+
+ if (backing_size == 0) {
+ fprintf(stderr, "%s: backing file size zero?\n", device);
+ return -1;
+ }
+
+ if (lbasize == 0) {
+ fprintf(stderr, "%s: blocksize zero?\n", device);
+ return -1;
+ }
+
+ proposed_blocksize = single_file.blocksize ? single_file.blocksize :
+ sysconf(_SC_PAGESIZE);
+ if (lbasize > proposed_blocksize) {
+ fprintf(stderr, "%s: lba size %d smaller than blocksize %u\n",
+ device, lbasize, proposed_blocksize);
+ return -1;
+ }
+
+ if (single_file.isize % proposed_blocksize > 0) {
+ fprintf(stderr, "%s: size parameter %llu not congruent with blocksize %u\n",
+ device, (unsigned long long)single_file.isize,
+ proposed_blocksize);
+ return -1;
+ }
+
+ if (single_file.isize > backing_size) {
+ fprintf(stderr, "%s: file size %llu smaller than size param %llu\n",
+ device, backing_size,
+ (unsigned long long)single_file.isize);
+ return -1;
+ }
+
+ if (!single_file.blocksize)
+ single_file.blocksize = proposed_blocksize;
+ if (!single_file.isize)
+ single_file.isize = backing_size;
+
+ single_file.isize = round_down(single_file.isize, single_file.blocksize);
+ single_file.blocks = single_file.isize / single_file.blocksize;
+
+ return single_file_configure_simple(filename);
+}
+
+int single_file_configure_simple(const char *filename)
+{
+ if (!single_file.blocksize)
+ single_file.blocksize = sysconf(_SC_PAGESIZE);
+
+ if (filename) {
+ char *n = strdup(filename);
+
+ if (!n) {
+ perror(filename);
+ return -1;
+ }
+
+ if (single_file_name_set)
+ free((void *)single_file_name);
+ single_file_name = n;
+ single_file_name_set = true;
+ }
+
+ get_now(&startup_time);
+ single_file.atime = startup_time;
+ single_file.mtime = startup_time;
+ single_file.ctime = startup_time;
+
+ if (!single_file.ro)
+ single_file.mode |= 0220;
+
+ return 0;
+}
+
+void single_file_close(void)
+{
+ close(single_file.backing_fd);
+ single_file.backing_fd = -1;
+
+ if (single_file_name_set)
+ free((void *)single_file_name);
+ single_file_name_set = false;
+}
diff --git a/meson.build b/meson.build
index 827ec45ad3ad75..de038df8d92071 100644
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,7 @@ endif
if service_socket_perms == ''
service_socket_perms = '0220'
endif
+private_cfg.set('FUSE_SERVICE_SOCKET_DIR_RAW', service_socket_dir)
private_cfg.set_quoted('FUSE_SERVICE_SOCKET_DIR', service_socket_dir)
private_cfg.set('FUSE_SERVICE_SOCKET_PERMS', service_socket_perms)
^ permalink raw reply related
* [PATCH 10/13] mount_service: allow installation as a setuid program
From: Darrick J. Wong @ 2026-04-30 21:17 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Allow installation of the mount service helper as a setuid program so
that regular users can access containerized filesystem drivers.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
README.md | 3 +++
test/ci-build.sh | 14 ++++++++++++++
util/install_helper.sh | 6 ++++++
util/meson.build | 3 ++-
4 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 6cf23cd870109a..92b85522e7890a 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,9 @@ Security implications
The *fusermount3* program is installed setuid root. This is done to
allow normal users to mount their own filesystem implementations.
+If built, the *fuservicemount3* program will also be installed setuid
+root so that normal users can access containerized filesystem
+implementations.
To limit the harm that malicious users can do this way, *fusermount3*
enforces the following limitations:
diff --git a/test/ci-build.sh b/test/ci-build.sh
index 8b019a0b5e52c1..f6e6c3d9de4e4e 100755
--- a/test/ci-build.sh
+++ b/test/ci-build.sh
@@ -60,11 +60,18 @@ non_sanitized_build()
# libfuse will first try the install path and then system defaults
sudo chmod 4755 ${PREFIX_DIR}/bin/fusermount3
+ test -x "${PREFIX_DIR}/sbin/fuservicemount3" && \
+ sudo chmod 4755 ${PREFIX_DIR}/sbin/fuservicemount3
# also needed for some of the tests
sudo chown root:root util/fusermount3
sudo chmod 4755 util/fusermount3
+ if [ -x util/fuservicemount3 ]; then
+ sudo chown root:root util/fuservicemount3
+ sudo chmod 4755 util/fuservicemount3
+ fi
+
${TEST_CMD}
popd
rm -fr build-${CC}
@@ -101,11 +108,18 @@ sanitized_build()
ninja
sudo env PATH=$PATH ninja install
sudo chmod 4755 ${PREFIX_DIR}/bin/fusermount3
+ test -x "${PREFIX_DIR}/sbin/fuservicemount3" && \
+ sudo chmod 4755 ${PREFIX_DIR}/sbin/fuservicemount3
# also needed for some of the tests
sudo chown root:root util/fusermount3
sudo chmod 4755 util/fusermount3
+ if [ -x util/fuservicemount3 ]; then
+ sudo chown root:root util/fuservicemount3
+ sudo chmod 4755 util/fuservicemount3
+ fi
+
# Test as root and regular user
sudo env PATH=$PATH ${TEST_CMD}
# Cleanup temporary files (since they are now owned by root)
diff --git a/util/install_helper.sh b/util/install_helper.sh
index 76f2b47fe6c8f9..4c6f9dc3dc70aa 100755
--- a/util/install_helper.sh
+++ b/util/install_helper.sh
@@ -11,6 +11,7 @@ bindir="$2"
udevrulesdir="$3"
useroot="$4"
initscriptdir="$5"
+sbindir="$6"
# Both sysconfdir and bindir are absolute paths (since they are joined
# with --prefix in meson.build), but need to be interpreted relative
@@ -31,6 +32,11 @@ if $useroot; then
chown root:root "${DESTDIR}${bindir}/fusermount3"
chmod u+s "${DESTDIR}${bindir}/fusermount3"
+ if [ -e "${DESTDIR}${sbindir}/fuservicemount3" ]; then
+ chown root:root "${DESTDIR}${sbindir}/fuservicemount3"
+ chmod u+s "${DESTDIR}${sbindir}/fuservicemount3"
+ fi
+
if test ! -e "${DESTDIR}/dev/fuse"; then
mkdir -p "${DESTDIR}/dev"
mknod "${DESTDIR}/dev/fuse" -m 0666 c 10 229
diff --git a/util/meson.build b/util/meson.build
index 85b54d5d322dcb..e15dd9bbb0c486 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -46,4 +46,5 @@ meson.add_install_script('install_helper.sh',
join_paths(get_option('prefix'), get_option('bindir')),
udevrulesdir,
'@0@'.format(get_option('useroot')),
- get_option('initscriptdir'))
+ get_option('initscriptdir'),
+ join_paths(get_option('prefix'), get_option('sbindir')))
^ permalink raw reply related
* [PATCH 09/13] mount.fuse3: integrate systemd service startup
From: Darrick J. Wong @ 2026-04-30 21:17 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Teach mount.fuse3 how to start fuse via systemd service, if present.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
util/mount_service.h | 9 +++
doc/fuservicemount3.8 | 10 +++
meson.build | 3 +
util/fuservicemount.c | 47 +++++++++++++
util/meson.build | 14 +++-
util/mount.fuse.c | 171 +++++++++++++++++++++++++++++++++++++++++++------
util/mount_service.c | 43 ++++++++++++
7 files changed, 270 insertions(+), 27 deletions(-)
diff --git a/util/mount_service.h b/util/mount_service.h
index a0b952a15dacf3..ec8008a7f53942 100644
--- a/util/mount_service.h
+++ b/util/mount_service.h
@@ -37,4 +37,13 @@ int mount_service_main(int argc, char *argv[]);
*/
const char *mount_service_subtype(const char *fstype);
+/**
+ * Discover if there is a fuse service socket for the given fuse filesystem type.
+ * The type must not contain a path separator.
+ *
+ * @param fstype the type of a fuse filesystem type (e.g. fuse.Y, fuseblk.Y, or Y)
+ * @return true if available, false if not
+ */
+bool mount_service_present(const char *fstype);
+
#endif /* MOUNT_SERVICE_H_ */
diff --git a/doc/fuservicemount3.8 b/doc/fuservicemount3.8
index e45d6a89c8b81a..aa2167cb4872c6 100644
--- a/doc/fuservicemount3.8
+++ b/doc/fuservicemount3.8
@@ -7,12 +7,20 @@ .SH SYNOPSIS
.B mountpoint
.BI -t " fstype"
[
-.I options
+.BI -o " options"
]
+
+.B fuservicemount3
+.BI -t " fstype"
+.B --check
+
.SH DESCRIPTION
Mount a filesystem using a FUSE server that runs as a socket service.
These servers can be contained using the platform's service management
framework.
+
+The second form checks if there is a FUSE service available for the given
+filesystem type.
.SH "AUTHORS"
.LP
The author of the fuse socket service code is Darrick J. Wong <djwong@kernel.org>.
diff --git a/meson.build b/meson.build
index c8326b79fcee8f..827ec45ad3ad75 100644
--- a/meson.build
+++ b/meson.build
@@ -83,7 +83,8 @@ private_cfg.set('FUSE_SERVICE_SOCKET_PERMS', service_socket_perms)
# Test for presence of some functions
test_funcs = [ 'fork', 'fstatat', 'openat', 'readlinkat', 'pipe2',
'splice', 'vmsplice', 'posix_fallocate', 'fdatasync',
- 'utimensat', 'copy_file_range', 'fallocate', 'fspacectl' ]
+ 'utimensat', 'copy_file_range', 'fallocate', 'fspacectl',
+ 'faccessat' ]
foreach func : test_funcs
private_cfg.set('HAVE_' + func.to_upper(),
cc.has_function(func, prefix: include_default, args: args_default))
diff --git a/util/fuservicemount.c b/util/fuservicemount.c
index 9c694a4290f94e..4d4cad6cb9253c 100644
--- a/util/fuservicemount.c
+++ b/util/fuservicemount.c
@@ -9,10 +9,57 @@
* This program wraps the mounting of FUSE filesystems that run in systemd
*/
#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
#include "fuse_config.h"
#include "mount_service.h"
+static int check_service(const char *fstype)
+{
+ if (!fstype) {
+ fprintf(stderr,
+ "fuservicemount: expected fs type for --check\n");
+ return EXIT_FAILURE;
+ }
+
+ return mount_service_present(fstype) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
int main(int argc, char *argv[])
{
+ char *fstype = NULL;
+ bool check = false;
+ int i;
+
+ /*
+ * If the user passes us exactly the args -t FSTYPE --check then
+ * we'll just check if there's a service for the FSTYPE fuse server.
+ * This doesn't tell us if the listening socket is actually connected
+ * to anything.
+ */
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "--check")) {
+ if (check) {
+ check = false;
+ break;
+ }
+ check = true;
+ } else if (!strcmp(argv[i], "-t") && i + 1 < argc) {
+ if (fstype) {
+ check = false;
+ break;
+ }
+ fstype = argv[i + 1];
+ i++;
+ } else {
+ check = false;
+ break;
+ }
+ }
+ if (check)
+ return check_service(fstype);
+
return mount_service_main(argc, argv);
}
diff --git a/util/meson.build b/util/meson.build
index aa646ef3c77d16..85b54d5d322dcb 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -6,21 +6,27 @@ executable('fusermount3', ['fusermount.c', '../lib/mount_util.c', '../lib/util.c
install_dir: get_option('bindir'),
c_args: '-DFUSE_CONF="@0@"'.format(fuseconf_path))
+mount_service_sources = []
+mount_service_cflags = []
if private_cfg.get('HAVE_SERVICEMOUNT', false)
- executable('fuservicemount3', ['mount_service.c', 'fuservicemount.c', '../lib/mount_util.c', 'fuser_conf.c'],
+ mount_service_sources += ['mount_service.c', '../lib/mount_util.c', 'fuser_conf.c']
+ mount_service_cflags += ['-DFUSE_CONF="@0@"'.format(fuseconf_path)]
+ fuservicemount_path = join_paths(get_option('prefix'), get_option('sbindir'))
+ mount_service_cflags += ['-DFUSERVICEMOUNT_DIR="@0@"'.format(fuservicemount_path)]
+ executable('fuservicemount3', ['fuservicemount.c'] + mount_service_sources,
include_directories: include_dirs,
link_with: [ libfuse ],
install: true,
install_dir: get_option('sbindir'),
- c_args: ['-DFUSE_USE_VERSION=319', '-DFUSE_CONF="@0@"'.format(fuseconf_path)])
+ c_args: ['-DFUSE_USE_VERSION=319'] + mount_service_cflags)
endif
-executable('mount.fuse3', ['mount.fuse.c'],
+executable('mount.fuse3', ['mount.fuse.c'] + mount_service_sources,
include_directories: include_dirs,
link_with: [ libfuse ],
install: true,
install_dir: get_option('sbindir'),
- c_args: '-DFUSE_USE_VERSION=317')
+ c_args: ['-DFUSE_USE_VERSION=319'] + mount_service_cflags)
udevrulesdir = get_option('udevrulesdir')
diff --git a/util/mount.fuse.c b/util/mount.fuse.c
index 382f77165a903c..802d7f99fe7815 100644
--- a/util/mount.fuse.c
+++ b/util/mount.fuse.c
@@ -6,6 +6,9 @@
See the file GPL2.txt.
*/
+/* For environ */
+#define _GNU_SOURCE
+
#include "fuse_config.h"
#include <stdio.h>
@@ -17,6 +20,9 @@
#include <fcntl.h>
#include <pwd.h>
#include <sys/wait.h>
+#ifdef HAVE_SERVICEMOUNT
+#include <spawn.h>
+#endif
#ifdef linux
#include <sys/prctl.h>
@@ -49,6 +55,9 @@
#endif
#include "fuse.h"
+#ifdef HAVE_SERVICEMOUNT
+# include "mount_service.h"
+#endif
static char *progname;
@@ -233,6 +242,107 @@ static void drop_and_lock_capabilities(void)
}
#endif
+#ifdef HAVE_SERVICEMOUNT
+#define FUSERVICEMOUNT_PROG "fuservicemount3"
+
+static int mount_service_child(char **argv)
+{
+ const char *full_path = FUSERVICEMOUNT_DIR "/" FUSERVICEMOUNT_PROG;
+ pid_t child_pid;
+ int child_status;
+ int ret;
+
+ /*
+ * First try the install path, then a system install, just like we do
+ * for fusermount. See man 7 environ for the global environ pointer.
+ */
+ ret = posix_spawn(&child_pid, full_path, NULL, NULL,
+ (char *const *)argv, environ);
+ if (ret)
+ ret = posix_spawnp(&child_pid, FUSERVICEMOUNT_PROG, NULL, NULL,
+ (char * const *)argv, environ);
+ if (ret) {
+ fprintf(stderr, "%s: could not start %s helper: %s\n",
+ argv[0], FUSERVICEMOUNT_PROG, strerror(ret));
+ return MOUNT_SERVICE_FALLBACK_NEEDED;
+ }
+
+ do {
+ ret = waitpid(child_pid, &child_status, 0);
+ } while (ret < 0 && errno == EINTR);
+ if (ret < 0) {
+ fprintf(stderr, "%s: could not wait for %s helper: %s\n",
+ argv[0], FUSERVICEMOUNT_PROG, strerror(errno));
+ return MOUNT_SERVICE_FALLBACK_NEEDED;
+ }
+
+ if (WIFEXITED(child_status))
+ return WEXITSTATUS(child_status);
+
+ /* terminated due to signal or coredump */
+ return EXIT_FAILURE;
+}
+
+static int try_service_main(char *argv0, char *fstype, char *source,
+ const char *mountpoint, char *options)
+{
+ char **argv;
+ char dash_o[] = "-o";
+ char dash_t[] = "-t";
+ char *mntpt;
+ int argc = 5; /* argv[0], -t type, mountpoint, and trailing NULL */
+ int i = 0;
+ int ret;
+
+ if (!mount_service_present(fstype))
+ return MOUNT_SERVICE_FALLBACK_NEEDED;
+
+ /* This can be an empty string if "mount.fuse3 null# /tmp/a" */
+ if (source && source[0] == 0)
+ source = NULL;
+
+ mntpt = strdup(mountpoint);
+ if (!mntpt) {
+ perror("mountpoint allocation");
+ return -1;
+ }
+
+ if (source)
+ argc++;
+ if (options)
+ argc += 2;
+
+ argv = calloc(argc, sizeof(char *));
+ if (!argv) {
+ perror("argv allocation");
+ free(mntpt);
+ return -1;
+ }
+
+ argv[i++] = argv0;
+ if (source)
+ argv[i++] = source;
+ argv[i++] = mntpt;
+ argv[i++] = dash_t;
+ argv[i++] = fstype;
+ if (options) {
+ argv[i++] = dash_o;
+ argv[i++] = options;
+ }
+ argv[i] = 0;
+
+ if (getuid() != 0) {
+ ret = mount_service_child(argv);
+ } else {
+ /* We're root, just do the mount directly. */
+ ret = mount_service_main(argc - 1, argv);
+ }
+ free(argv);
+ free(mntpt);
+ return ret;
+}
+#endif
+
int main(int argc, char *argv[])
{
char *type = NULL;
@@ -280,9 +390,7 @@ int main(int argc, char *argv[])
mountpoint = argv[2];
for (i = 3; i < argc; i++) {
- if (strcmp(argv[i], "-v") == 0) {
- continue;
- } else if (strcmp(argv[i], "-t") == 0) {
+ if (strcmp(argv[i], "-t") == 0) {
i++;
if (i == argc) {
@@ -303,6 +411,30 @@ int main(int argc, char *argv[])
progname);
exit(1);
}
+ }
+ }
+
+ if (!type) {
+ if (source) {
+ dup_source = xstrdup(source);
+ type = dup_source;
+ source = strchr(type, '#');
+ if (source)
+ *source++ = '\0';
+ if (!type[0]) {
+ fprintf(stderr, "%s: empty filesystem type\n",
+ progname);
+ exit(1);
+ }
+ } else {
+ fprintf(stderr, "%s: empty source\n", progname);
+ exit(1);
+ }
+ }
+
+ for (i = 3; i < argc; i++) {
+ if (strcmp(argv[i], "-v") == 0) {
+ continue;
} else if (strcmp(argv[i], "-o") == 0) {
char *opts;
const char *opt;
@@ -366,24 +498,6 @@ int main(int argc, char *argv[])
if (suid)
options = add_option("suid", options);
- if (!type) {
- if (source) {
- dup_source = xstrdup(source);
- type = dup_source;
- source = strchr(type, '#');
- if (source)
- *source++ = '\0';
- if (!type[0]) {
- fprintf(stderr, "%s: empty filesystem type\n",
- progname);
- exit(1);
- }
- } else {
- fprintf(stderr, "%s: empty source\n", progname);
- exit(1);
- }
- }
-
if (setuid_name && setuid_name[0]) {
#ifdef linux
if (drop_privileges) {
@@ -429,6 +543,21 @@ int main(int argc, char *argv[])
drop_and_lock_capabilities();
}
#endif
+
+#ifdef HAVE_SERVICEMOUNT
+ /*
+ * Now that we know the desired filesystem type, see if we can find
+ * a socket service implementing that, if we haven't selected any weird
+ * options that would prevent that.
+ */
+ if (!pass_fuse_fd && !(setuid_name && setuid_name[0])) {
+ int ret = try_service_main(argv[0], type, source, mountpoint,
+ options);
+ if (ret != MOUNT_SERVICE_FALLBACK_NEEDED)
+ return ret;
+ }
+#endif
+
add_arg(&command, type);
if (source)
add_arg(&command, source);
diff --git a/util/mount_service.c b/util/mount_service.c
index 95de56f2b625fe..bc5940bc900dad 100644
--- a/util/mount_service.c
+++ b/util/mount_service.c
@@ -2066,3 +2066,46 @@ int mount_service_main(int argc, char *argv[])
mount_service_destroy(&mo);
return ret;
}
+
+bool mount_service_present(const char *fstype)
+{
+ struct sockaddr_un name;
+ struct stat stbuf;
+ char path[PATH_MAX];
+ const char *subtype;
+ ssize_t written;
+ int ret;
+
+ subtype = mount_service_subtype(fstype);
+ if (!subtype)
+ return false;
+
+ /*
+ * The full path to the socket must fit within the AF_UNIX socket path
+ * buffer, which is much shorter than PATH_MAX.
+ */
+ written = snprintf(path, sizeof(path), FUSE_SERVICE_SOCKET_DIR "/%s",
+ subtype);
+ if (written >= sizeof(name.sun_path) || written >= sizeof(path))
+ return false;
+
+ ret = stat(path, &stbuf);
+ if (ret)
+ return false;
+
+ if (!S_ISSOCK(stbuf.st_mode))
+ return false;
+
+#ifdef HAVE_FACCESSAT
+ /*
+ * Can we write to the service socket with the real uid? This accounts
+ * for setuid and ACLs on the socket. Note that we connect() to the
+ * socket having dropped setuid privileges.
+ */
+ ret = faccessat(AT_FDCWD, path, W_OK, 0);
+#else
+ /* Can we write to the service socket with the real uid? */
+ ret = access(path, W_OK);
+#endif
+ return ret == 0;
+}
^ permalink raw reply related
* [PATCH 08/13] mount_service: enable unprivileged users in a similar manner as fusermount
From: Darrick J. Wong @ 2026-04-30 21:17 UTC (permalink / raw)
To: bernd, djwong
Cc: linux-fsdevel, fuse-devel, linux-ext4, miklos, neal, joannelkoong
In-Reply-To: <177758363484.1314717.11777978893472254088.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Some Linux distributions allow unprivileged users to mount fuse
filesystems through the use of the setuid fusermount helper program. It
would be useful to provide similar functionality when mounting a
filesystem that runs as a systemd service.
Therefore, read the fuse config file and implement the same checks as
fusermount. The only new requirement is that the unprivileged user must
be able to open the mountpoint for write access if it's a regular file;
or have write access if it's a directory.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
util/mount_service.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 227 insertions(+), 5 deletions(-)
diff --git a/util/mount_service.c b/util/mount_service.c
index 915a0c4b610792..95de56f2b625fe 100644
--- a/util/mount_service.c
+++ b/util/mount_service.c
@@ -38,6 +38,7 @@
#include "fuse_i.h"
#include "fuse_service_priv.h"
#include "mount_service.h"
+#include "fuser_conf.h"
struct mount_service {
/* prefix for printing error messages */
@@ -313,8 +314,10 @@ static int mount_service_connect(struct mount_service *mo)
if (ret)
return ret;
+ drop_privs();
ret = connect(sockfd, (const struct sockaddr *)&name, sizeof(name));
if (ret && (errno == ENOENT || errno == ECONNREFUSED)) {
+ restore_privs();
fprintf(stderr, "%s: no safe filesystem driver for %s available.\n",
mo->msgtag, mo->subtype);
close(sockfd);
@@ -323,10 +326,12 @@ static int mount_service_connect(struct mount_service *mo)
if (ret) {
int error = errno;
+ restore_privs();
fprintf(stderr, "%s: %s: %s\n",
mo->msgtag, name.sun_path, strerror(error));
goto out;
}
+ restore_privs();
ret = try_drop_passrights(mo, sockfd);
if (ret)
@@ -349,7 +354,7 @@ static int mount_service_send_hello(struct mount_service *mo)
struct fuse_service_hello_reply reply = { };
ssize_t size;
- if (getuid() == 0)
+ if (getuid() == 0 || user_allow_other)
hello.flags |= htonl(FUSE_SERVICE_FLAG_ALLOW_OTHER);
size = __send_packet(mo, &hello, sizeof(hello));
@@ -586,14 +591,17 @@ static int mount_service_send_required_files(struct mount_service *mo,
{
int ret;
+ drop_privs();
mo->fusedevfd = open(fusedev, O_RDWR | O_CLOEXEC);
if (mo->fusedevfd < 0) {
int error = errno;
+ restore_privs();
fprintf(stderr, "%s: %s: %s\n",
mo->msgtag, fusedev, strerror(error));
return -1;
}
+ restore_privs();
ret = mount_service_send_file(mo, FUSE_SERVICE_ARGV, mo->argvfd);
if (ret)
@@ -710,14 +718,17 @@ static int prepare_bdev(struct mount_service *mo,
if (oc->block_size) {
int block_size = ntohl(oc->block_size);
+ drop_privs();
ret = ioctl(fd, BLKBSZSET, &block_size);
if (ret) {
int error = errno;
+ restore_privs();
fprintf(stderr, "%s: %s: %s\n",
mo->msgtag, oc->path, strerror(error));
return -error;
}
+ restore_privs();
}
return 0;
@@ -754,6 +765,7 @@ static int mount_service_open_path(struct mount_service *mo,
}
open_flags = ntohl(oc->open_flags) | O_CLOEXEC;
+ drop_privs();
fd = open(oc->path, open_flags, ntohl(oc->create_mode));
if (fd < 0) {
int error = errno;
@@ -762,11 +774,13 @@ static int mount_service_open_path(struct mount_service *mo,
* Don't print a busy device error report because the
* filesystem might decide to retry.
*/
+ restore_privs();
if (error != EBUSY && !(request_flags & FUSE_SERVICE_OPEN_QUIET))
fprintf(stderr, "%s: %s: %s\n",
mo->msgtag, oc->path, strerror(error));
return mount_service_send_file_error(mo, error, oc->path);
}
+ restore_privs();
if (S_ISBLK(expected_fmt)) {
ret = prepare_bdev(mo, oc, fd);
@@ -994,6 +1008,15 @@ static int mount_service_handle_mntopts_cmd(struct mount_service *mo,
*equals = 0;
}
+ if (getuid() != 0 && !user_allow_other &&
+ (!strcmp(tok, "allow_other") ||
+ !strcmp(tok, "allow_root"))) {
+ fprintf(stderr,
+"%s: option %s only allowed if 'user_allow_other' is set in %s\n",
+ mo->msgtag, tok, FUSE_CONF);
+ return mount_service_send_reply(mo, EPERM);
+ }
+
#ifdef HAVE_NEW_MOUNT_API
if (mo->fsopenfd >= 0) {
int ret;
@@ -1077,19 +1100,64 @@ static int mount_service_handle_mtabopts_cmd(struct mount_service *mo,
return mount_service_send_reply(mo, 0);
}
+static int open_mountpoint(const char *mntpt, bool *require_dir)
+{
+ int ret;
+
+ *require_dir = false;
+
+ if (getuid() == 0) {
+ /*
+ * Open the alleged mountpoint. We're root, so we only bother
+ * checking for readability.
+ */
+ return open(mntpt, O_RDONLY | O_CLOEXEC);
+ }
+
+ /*
+ * Open the alleged mountpoint. For unprivileged callers, we only
+ * allow mounting on paths that the user can write to.
+ */
+ ret = open(mntpt, O_WRONLY | O_CLOEXEC);
+ if (ret >= 0 || errno != EISDIR)
+ return ret;
+
+ /*
+ * However, we can't open directories with write access. Try again in
+ * readonly mode, but require the caller to verify that we actually got
+ * a directory.
+ */
+ *require_dir = true;
+ ret = open(mntpt, O_RDONLY | O_CLOEXEC);
+ if (ret >= 0 || (errno != EACCES && errno != EPERM))
+ return ret;
+
+#ifdef O_PATH
+ /*
+ * If we can't open at all, let's try opening this directory with
+ * O_PATH.
+ */
+ return open(mntpt, O_PATH | O_CLOEXEC);
+#else
+ /* No idea what to do now */
+ errno = EACCES;
+ return -1;
+#endif
+}
+
static int attach_to_mountpoint(struct mount_service *mo, mode_t expected_fmt,
char *mntpt)
{
struct stat stbuf;
char *res_mntpt;
+ bool require_dir;
int mountfd = -1;
int error;
int ret;
- /*
- * Open the alleged mountpoint, make sure it's a dir or a file.
- */
- mountfd = open(mntpt, O_RDONLY | O_CLOEXEC);
+ drop_privs();
+
+ mountfd = open_mountpoint(mntpt, &require_dir);
if (mountfd < 0) {
error = errno;
fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mntpt,
@@ -1117,6 +1185,13 @@ static int attach_to_mountpoint(struct mount_service *mo, mode_t expected_fmt,
goto out_mountfd;
}
+ if (require_dir && !S_ISDIR(stbuf.st_mode)) {
+ error = EACCES;
+ fprintf(stderr, "%s: %s: Mount point must be directory.\n",
+ mo->msgtag, mntpt);
+ goto out_mountfd;
+ }
+
/*
* Resolve the (possibly relative) mountpoint path before chdir'ing
* onto it.
@@ -1193,6 +1268,7 @@ static int attach_to_mountpoint(struct mount_service *mo, mode_t expected_fmt,
mo->mountfd = mountfd;
mo->resv_mountpoint = res_mntpt;
+ restore_privs();
return mount_service_send_reply(mo, 0);
out_res_mntpt:
@@ -1201,6 +1277,7 @@ static int attach_to_mountpoint(struct mount_service *mo, mode_t expected_fmt,
close(mountfd);
out_error:
free(mntpt);
+ restore_privs();
return mount_service_send_reply(mo, error);
}
@@ -1580,6 +1657,141 @@ static int mount_service_fsopen_mount(struct mount_service *mo,
# define mount_service_fsopen_mount(...) (FUSE_MOUNT_FALLBACK_NEEDED)
#endif
+static int check_nonroot_file_access(struct mount_service *mo)
+{
+ struct stat sb1, sb2;
+ int fd;
+ int ret;
+
+ /*
+ * If we already succeeded in opening the file with write access, then
+ * we're good.
+ */
+ ret = fcntl(mo->mountfd, F_GETFL);
+ if (ret < 0) {
+ int error = errno;
+
+ fprintf(stderr, "%s: %s: %s\n", mo->msgtag, mo->mountpoint,
+ strerror(error));
+ return -1;
+ }
+
+ if ((ret & O_ACCMODE) != O_RDONLY)
+ return 0;
+
+ ret = fstat(mo->mountfd, &sb1);
+ if (ret) {
+ int error = errno;
+
+ fprintf(stderr, "%s: %s: %s\n",
+ mo->msgtag, mo->mountpoint, strerror(error));
+ return -1;
+ }
+
+ /* Try to reopen the file with write access this time. */
+ fd = open(mo->real_mountpoint, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ int error = errno;
+
+ fprintf(stderr, "%s: %s: %s\n",
+ mo->msgtag, mo->mountpoint, strerror(error));
+ return -1;
+ }
+
+ /* Is this the same file? */
+ ret = fstat(fd, &sb2);
+ if (ret) {
+ int error = errno;
+
+ fprintf(stderr, "%s: %s: %s\n",
+ mo->msgtag, mo->mountpoint, strerror(error));
+ goto out_fd;
+ }
+
+ if (sb1.st_dev != sb2.st_dev || sb1.st_ino != sb2.st_ino) {
+ fprintf(stderr, "%s: %s: Mount point moved during fuse startup.\n",
+ mo->msgtag, mo->mountpoint);
+ ret = -1;
+ goto out_fd;
+ }
+
+ /*
+ * We reopened the same file with write access, everything is ok. Swap
+ * the two file descriptors so that we retain our write access.
+ */
+ ret = mo->mountfd;
+ mo->mountfd = fd;
+ fd = ret;
+ ret = 0;
+out_fd:
+ close(fd);
+ return ret;
+}
+
+static void adjust_nonroot_mount_flags(struct mount_service *mo,
+ struct fuse_service_mount_command *oc)
+{
+ const struct mount_flags *mf;
+ uint32_t ms_flags = ntohl(oc->ms_flags);
+
+ /* only care that the unsafe flags are set to the value of @on */
+ for (mf = mount_flags; mf->opt != NULL; mf++) {
+ if (mf->safe)
+ continue;
+ if (!!(ms_flags & mf->flag) == !!mf->on) {
+ ms_flags = (ms_flags & ~mf->flag) |
+ (mf->on ? 0 : mf->flag);
+
+ fprintf(stderr, "%s: unsafe option %s ignored\n",
+ mo->msgtag, mf->opt);
+ }
+ }
+
+ oc->ms_flags = htonl(ms_flags);
+}
+
+/*
+ * fuse.conf can limit the number of unprivileged fuse mounts. For
+ * unprivileged mounts (via setuid) we also require write access to the
+ * mountpoint, and we'll only accept certain underlying filesystems.
+ */
+static int check_nonroot_access(struct mount_service *mo,
+ struct fuse_service_mount_command *oc,
+ const struct stat *stbuf)
+{
+ struct statfs fs_buf;
+ int ret;
+
+ ret = check_nonroot_mount_count(mo->msgtag);
+ if (ret)
+ return -EUSERS;
+
+ ret = fstatfs(mo->mountfd, &fs_buf);
+ if (ret) {
+ int error = errno;
+
+ fprintf(stderr, "%s: %s: %s\n",
+ mo->msgtag, mo->mountpoint, strerror(error));
+ return -error;
+ }
+
+ adjust_nonroot_mount_flags(mo, oc);
+
+ drop_privs();
+ if (S_ISDIR(stbuf->st_mode))
+ ret = check_nonroot_dir_access(mo->msgtag,
+ mo->mountpoint,
+ mo->real_mountpoint,
+ stbuf);
+ else
+ ret = check_nonroot_file_access(mo);
+ if (!ret)
+ ret = check_nonroot_fstype(mo->msgtag, &fs_buf);
+ restore_privs();
+
+ return ret ? -EPERM : 0;
+}
+
static int mount_service_handle_mount_cmd(struct mount_service *mo,
struct fuse_service_packet *p,
size_t psz)
@@ -1621,6 +1833,12 @@ static int mount_service_handle_mount_cmd(struct mount_service *mo,
return mount_service_send_reply(mo, error);
}
+ if (getuid() != 0) {
+ ret = check_nonroot_access(mo, oc, &stbuf);
+ if (ret)
+ return mount_service_send_reply(mo, -ret);
+ }
+
if (mo->fsopenfd >= 0) {
ret = mount_service_fsopen_mount(mo, oc, &stbuf);
if (ret != FUSE_MOUNT_FALLBACK_NEEDED)
@@ -1752,6 +1970,10 @@ int mount_service_main(int argc, char *argv[])
else
mo.msgtag = "mount.service";
+ drop_privs();
+ read_conf(mo.msgtag);
+ restore_privs();
+
ret = mount_service_init(&mo, argc, argv);
if (ret)
return EXIT_FAILURE;
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox