Linux EXT4 FS development
 help / color / mirror / Atom feed
* [PATCH v11 10/15] nfs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

An NFS server re-exporting an NFS mount point needs to report
the case sensitivity behavior of the underlying filesystem to
its clients. NFSD's attribute encoder obtains that information
by calling vfs_fileattr_get() on the lower filesystem, so the
NFS client must implement fileattr_get to surface what it
learned from its own server.

The NFS client already retrieves case sensitivity information
from servers during mount via PATHCONF (NFSv3) or the
FATTR4_CASE_INSENSITIVE/FATTR4_CASE_PRESERVING attributes
(NFSv4). Expose this information through fileattr_get by
reporting the FS_XFLAG_CASEFOLD and FS_XFLAG_CASENONPRESERVING
flags. NFSv2 lacks PATHCONF support, so mounts using that protocol
version default to standard POSIX behavior: case-sensitive and
case-preserving.

PATHCONF is now invoked unconditionally for NFSv2 and NFSv3 mounts
so the case-sensitivity capabilities are established even when
the user pins server->namelen with the namlen= mount option. That
option is orthogonal to case handling, and skipping PATHCONF
because namelen was already known would leave the caps unset.

The two capability bits carry opposite polarity
because their POSIX defaults differ. Most servers are
case-sensitive and case-preserving, matching "neither
xflag set." NFS_CAP_CASE_INSENSITIVE is set only when the
server affirms case insensitivity, so "server said no" and
"server did not answer" both collapse to the case-sensitive
default. NFS_CAP_CASE_NONPRESERVING follows the same pattern in
the opposite direction: set only when the server affirms that it
does not preserve case, so that silence or a missing attribute
lands on the case-preserving default. The NFSv4 probe checks
res.attr_bitmask[0] to distinguish "server said false" from "server
omitted the attribute" before setting the bit.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/client.c           | 22 +++++++++++++++++-----
 fs/nfs/inode.c            | 23 +++++++++++++++++++++++
 fs/nfs/internal.h         |  3 +++
 fs/nfs/nfs3proc.c         |  2 ++
 fs/nfs/nfs3xdr.c          |  7 +++++--
 fs/nfs/nfs4proc.c         |  7 +++++--
 fs/nfs/proc.c             |  3 +++
 fs/nfs/symlink.c          |  3 +++
 include/linux/nfs_fs_sb.h |  2 +-
 include/linux/nfs_xdr.h   |  2 ++
 10 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index be02bb227741..2f4d41ecfa71 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -933,15 +933,27 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
 
 	nfs_server_set_fsinfo(server, &fsinfo);
 
-	/* Get some general file system info */
-	if (server->namelen == 0) {
-		struct nfs_pathconf pathinfo;
+	{
+		struct nfs_pathconf pathinfo = { };
 
 		pathinfo.fattr = fattr;
 		nfs_fattr_init(fattr);
 
-		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
-			server->namelen = pathinfo.max_namelen;
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) {
+			if (server->namelen == 0)
+				server->namelen = pathinfo.max_namelen;
+			/*
+			 * NFSv4 PATHCONF does not carry the case-sensitivity
+			 * fields; those caps are set from FATTR4_CASE_*
+			 * attributes during the set_capabilities probe.
+			 */
+			if (clp->rpc_ops->version < 4) {
+				if (pathinfo.case_insensitive)
+					server->caps |= NFS_CAP_CASE_INSENSITIVE;
+				if (!pathinfo.case_preserving)
+					server->caps |= NFS_CAP_CASE_NONPRESERVING;
+			}
+		}
 	}
 
 	if (clp->rpc_ops->discover_trunking != NULL &&
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 98a8f0de1199..bce2466552c4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -41,6 +41,7 @@
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
 #include <linux/iversion.h>
+#include <linux/fileattr.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -1101,6 +1102,28 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 }
 EXPORT_SYMBOL_GPL(nfs_getattr);
 
+int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+
+	/*
+	 * Case handling is a property of the exported filesystem on the
+	 * NFS server, reported to the client at mount via PATHCONF
+	 * (NFSv3) or FATTR4_CASE_INSENSITIVE / FATTR4_CASE_PRESERVING
+	 * (NFSv4). Unlike filesystems that always preserve case, an NFS
+	 * mount may front a backend that does not, so both flags can
+	 * appear.
+	 */
+	if (nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	if (nfs_server_capable(inode, NFS_CAP_CASE_NONPRESERVING))
+		fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_fileattr_get);
+
 static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
 {
 	refcount_set(&l_ctx->count, 1);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index fc5456377160..309d3f679bb3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -449,6 +449,9 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
 extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
 extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
 
+struct file_kattr;
+int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 /* localio.c */
 struct nfs_local_dio {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 95d7cd564b74..b80d0c5efc27 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -1053,6 +1053,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 #ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= nfs3_listxattr,
 	.get_inode_acl	= nfs3_get_acl,
@@ -1064,6 +1065,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 #ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= nfs3_listxattr,
 	.get_inode_acl	= nfs3_get_acl,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e17d72908412..e745e78faab0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2276,8 +2276,11 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
 	if (unlikely(!p))
 		return -EIO;
 	result->max_link = be32_to_cpup(p++);
-	result->max_namelen = be32_to_cpup(p);
-	/* ignore remaining fields */
+	result->max_namelen = be32_to_cpup(p++);
+	p++;	/* ignore no_trunc */
+	p++;	/* ignore chown_restricted */
+	result->case_insensitive = be32_to_cpup(p++) != 0;
+	result->case_preserving = be32_to_cpup(p) != 0;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d839a97df822..034e3e87e863 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3944,8 +3944,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 			server->caps |= NFS_CAP_SYMLINKS;
 		if (res.case_insensitive)
 			server->caps |= NFS_CAP_CASE_INSENSITIVE;
-		if (res.case_preserving)
-			server->caps |= NFS_CAP_CASE_PRESERVING;
+		if ((res.attr_bitmask[0] & FATTR4_WORD0_CASE_PRESERVING) &&
+		    !res.case_preserving)
+			server->caps |= NFS_CAP_CASE_NONPRESERVING;
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
 		if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
 			server->caps |= NFS_CAP_SECURITY_LABEL;
@@ -10598,6 +10599,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.listxattr	= nfs4_listxattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static const struct inode_operations nfs4_file_inode_operations = {
@@ -10605,6 +10607,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.listxattr	= nfs4_listxattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static struct nfs_server *nfs4_clone_server(struct nfs_server *source,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 70795684b8e8..03c2c1f31be9 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -598,6 +598,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 {
 	info->max_link = 0;
 	info->max_namelen = NFS2_MAXNAMLEN;
+	info->case_preserving = true;
 	return 0;
 }
 
@@ -718,12 +719,14 @@ static const struct inode_operations nfs_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static const struct inode_operations nfs_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 const struct nfs_rpc_ops nfs_v2_clientops = {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 58146e935402..74a072896f8d 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -22,6 +22,8 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
+#include "internal.h"
+
 /* Symlink caching in the page cache is even more simplistic
  * and straight-forward than readdir caching.
  */
@@ -74,4 +76,5 @@ const struct inode_operations nfs_symlink_inode_operations = {
 	.get_link	= nfs_get_link,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4daee27fa5eb..34d294774f8c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -306,7 +306,7 @@ struct nfs_server {
 #define NFS_CAP_ATOMIC_OPEN	(1U << 4)
 #define NFS_CAP_LGOPEN		(1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE	(1U << 6)
-#define NFS_CAP_CASE_PRESERVING	(1U << 7)
+#define NFS_CAP_CASE_NONPRESERVING	(1U << 7)
 #define NFS_CAP_REBOOT_LAYOUTRETURN	(1U << 8)
 #define NFS_CAP_OFFLOAD_STATUS	(1U << 9)
 #define NFS_CAP_ZERO_RANGE	(1U << 10)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ff1f12aa73d2..7c2057e40f99 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -182,6 +182,8 @@ struct nfs_pathconf {
 	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u32			max_link; /* max # of hard links */
 	__u32			max_namelen; /* max name length */
+	bool			case_insensitive;
+	bool			case_preserving;
 };
 
 struct nfs4_change_info {

-- 
2.53.0


^ permalink raw reply related

* [PATCH v11 11/15] vboxsf: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need a way to query whether a
filesystem handles filenames in a case-sensitive manner. Report
VirtualBox shared folder case handling behavior via the
FS_XFLAG_CASEFOLD flag.

The case sensitivity property is queried from the VirtualBox host
service at mount time and cached in struct vboxsf_sbi. The host
determines case sensitivity based on the underlying host filesystem
(for example, Windows NTFS is case-insensitive while Linux ext4 is
case-sensitive).

VirtualBox shared folders always preserve filename case exactly
as provided by the guest. The host interface does not expose a
separate case-preserving property; leaving
FS_XFLAG_CASENONPRESERVING unset reports the POSIX-default
case-preserving behavior, which matches vboxsf semantics.

The callback is registered in all three inode_operations
structures (directory, file, and symlink) to ensure consistent
reporting across all inode types.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/vboxsf/dir.c    |  1 +
 fs/vboxsf/file.c   |  6 ++++--
 fs/vboxsf/super.c  |  7 +++++++
 fs/vboxsf/utils.c  | 30 ++++++++++++++++++++++++++++++
 fs/vboxsf/vfsmod.h |  6 ++++++
 5 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 42bedc4ec7af..c5bd3271aa96 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -477,4 +477,5 @@ const struct inode_operations vboxsf_dir_iops = {
 	.symlink = vboxsf_dir_symlink,
 	.getattr = vboxsf_getattr,
 	.setattr = vboxsf_setattr,
+	.fileattr_get = vboxsf_fileattr_get,
 };
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 7a7a3fbb2651..943953867e18 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -222,7 +222,8 @@ const struct file_operations vboxsf_reg_fops = {
 
 const struct inode_operations vboxsf_reg_iops = {
 	.getattr = vboxsf_getattr,
-	.setattr = vboxsf_setattr
+	.setattr = vboxsf_setattr,
+	.fileattr_get = vboxsf_fileattr_get,
 };
 
 static int vboxsf_read_folio(struct file *file, struct folio *folio)
@@ -389,5 +390,6 @@ static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
 }
 
 const struct inode_operations vboxsf_lnk_iops = {
-	.get_link = vboxsf_get_link
+	.get_link = vboxsf_get_link,
+	.fileattr_get = vboxsf_fileattr_get,
 };
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index a618cb093e00..a61fbab51d37 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -185,6 +185,13 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
 	if (err)
 		goto fail_unmap;
 
+	/*
+	 * A failed query leaves sbi->case_insensitive false, so the
+	 * mount defaults to reporting case-sensitive behavior. Do not
+	 * fail the mount over an advisory attribute.
+	 */
+	vboxsf_query_case_sensitive(sbi);
+
 	sb->s_magic = VBOXSF_SUPER_MAGIC;
 	sb->s_blocksize = 1024;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 440e8c50629d..298bfc93255c 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -11,6 +11,7 @@
 #include <linux/sizes.h>
 #include <linux/pagemap.h>
 #include <linux/vfs.h>
+#include <linux/fileattr.h>
 #include "vfsmod.h"
 
 struct inode *vboxsf_new_inode(struct super_block *sb)
@@ -567,3 +568,32 @@ int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
 
 	return err;
 }
+
+int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi)
+{
+	struct shfl_volinfo volinfo = {};
+	u32 buf_len;
+	int err;
+
+	buf_len = sizeof(volinfo);
+	err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME,
+			    &buf_len, &volinfo);
+	if (err)
+		return err;
+	if (buf_len < sizeof(volinfo))
+		return 0;
+
+	sbi->case_insensitive = !volinfo.properties.case_sensitive;
+	return 0;
+}
+
+int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+
+	if (sbi->case_insensitive) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	return 0;
+}
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
index 05973eb89d52..b61afd0ce842 100644
--- a/fs/vboxsf/vfsmod.h
+++ b/fs/vboxsf/vfsmod.h
@@ -47,6 +47,7 @@ struct vboxsf_sbi {
 	u32 next_generation;
 	u32 root;
 	int bdi_id;
+	bool case_insensitive;
 };
 
 /* per-inode information */
@@ -111,6 +112,11 @@ void vboxsf_dir_info_free(struct vboxsf_dir_info *p);
 int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
 			u64 handle);
 
+int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi);
+
+struct file_kattr;
+int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 /* from vboxsf_wrappers.c */
 int vboxsf_connect(void);
 void vboxsf_disconnect(void);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v11 12/15] isofs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need a way to query whether a
filesystem handles filenames in a case-sensitive manner so
they can provide correct semantics to remote clients. Without
this information, NFS exports of ISO 9660 filesystems cannot
advertise their filename case behavior.

Implement isofs_fileattr_get() to report ISO 9660 case handling
behavior via the FS_XFLAG_CASEFOLD flag. The 'check=r' (relaxed)
mount option enables case-insensitive lookups, and this setting
determines the value reported. By default, Joliet extensions
operate in relaxed mode while plain ISO 9660 uses strict
(case-sensitive) mode. All ISO 9660 variants are case-preserving,
meaning filenames are stored exactly as they appear on the disc.

Case handling is a superblock-wide property, so the callback
must report the same value for every inode type. Regular files
previously had no inode_operations; introduce
isofs_file_inode_operations to carry the callback. Symlinks
previously shared page_symlink_inode_operations; introduce
isofs_symlink_inode_operations, which wires page_get_link
alongside the callback, so that fileattr queries on a symlink
reach the isofs implementation instead of returning
-ENOIOCTLCMD. The flag is set in both fa->fsx_xflags and
fa->flags so FS_IOC_FSGETXATTR and FS_IOC_GETFLAGS agree.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/isofs/dir.c   | 24 ++++++++++++++++++++++++
 fs/isofs/inode.c |  3 ++-
 fs/isofs/isofs.h |  5 +++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2fd9948d606e..1db6b0db3808 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -14,6 +14,7 @@
 #include <linux/gfp.h>
 #include <linux/filelock.h>
 #include "isofs.h"
+#include <linux/fileattr.h>
 
 int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode)
 {
@@ -267,6 +268,17 @@ static int isofs_readdir(struct file *file, struct dir_context *ctx)
 	return result;
 }
 
+int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct isofs_sb_info *sbi = ISOFS_SB(dentry->d_sb);
+
+	if (sbi->s_check == 'r') {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	return 0;
+}
+
 const struct file_operations isofs_dir_operations =
 {
 	.llseek = generic_file_llseek,
@@ -281,6 +293,18 @@ const struct file_operations isofs_dir_operations =
 const struct inode_operations isofs_dir_inode_operations =
 {
 	.lookup = isofs_lookup,
+	.fileattr_get = isofs_fileattr_get,
+};
+
+const struct inode_operations isofs_file_inode_operations =
+{
+	.fileattr_get = isofs_fileattr_get,
+};
+
+const struct inode_operations isofs_symlink_inode_operations =
+{
+	.get_link = page_get_link,
+	.fileattr_get = isofs_fileattr_get,
 };
 
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index efee53717f1c..68c286b7cc35 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1427,6 +1427,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
 
 	/* Install the inode operations vector */
 	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &isofs_file_inode_operations;
 		inode->i_fop = &generic_ro_fops;
 		switch (ei->i_file_format) {
 #ifdef CONFIG_ZISOFS
@@ -1442,7 +1443,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
 		inode->i_op = &isofs_dir_inode_operations;
 		inode->i_fop = &isofs_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		inode->i_op = &page_symlink_inode_operations;
+		inode->i_op = &isofs_symlink_inode_operations;
 		inode_nohighmem(inode);
 		inode->i_data.a_ops = &isofs_symlink_aops;
 	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 506555837533..a3cda3430020 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -197,7 +197,12 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 	}
 }
 
+struct file_kattr;
+int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 extern const struct inode_operations isofs_dir_inode_operations;
+extern const struct inode_operations isofs_file_inode_operations;
+extern const struct inode_operations isofs_symlink_inode_operations;
 extern const struct file_operations isofs_dir_operations;
 extern const struct address_space_operations isofs_symlink_aops;
 extern const struct export_operations isofs_export_ops;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v11 13/15] nfsd: Report export case-folding via NFSv3 PATHCONF
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

The hard-coded MSDOS_SUPER_MAGIC check in nfsd3_proc_pathconf()
only recognizes FAT filesystems as case-insensitive. Modern
filesystems like F2FS, exFAT, and CIFS support case-insensitive
directories, but NFSv3 clients cannot discover this capability.

Query the export's actual case behavior through ->fileattr_get
instead. This allows NFSv3 clients to correctly handle case
sensitivity for any filesystem that implements the fileattr
interface. Filesystems without ->fileattr_get continue to report
the default POSIX behavior (case-sensitive, case-preserving).

This change depends on commit ("fat: Implement fileattr_get for
case sensitivity"), which ensures FAT filesystems report their
case behavior correctly via the fileattr interface.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs3proc.c | 18 ++++++++++--------
 fs/nfsd/vfs.c      | 43 +++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/vfs.h      |  3 +++
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 42adc5461db0..7b094c5908f1 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -717,17 +717,19 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 
 	if (resp->status == nfs_ok) {
 		struct super_block *sb = argp->fh.fh_dentry->d_sb;
+		bool case_insensitive, case_preserving;
 
-		/* Note that we don't care for remote fs's here */
-		switch (sb->s_magic) {
-		case EXT2_SUPER_MAGIC:
+		if (sb->s_magic == EXT2_SUPER_MAGIC) {
 			resp->p_link_max = EXT2_LINK_MAX;
 			resp->p_name_max = EXT2_NAME_LEN;
-			break;
-		case MSDOS_SUPER_MAGIC:
-			resp->p_case_insensitive = 1;
-			resp->p_case_preserving  = 0;
-			break;
+		}
+
+		resp->status = nfsd_get_case_info(argp->fh.fh_dentry,
+						  &case_insensitive,
+						  &case_preserving);
+		if (resp->status == nfs_ok) {
+			resp->p_case_insensitive = case_insensitive;
+			resp->p_case_preserving = case_preserving;
 		}
 	}
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index eafdf7b7890f..9214f1f1f83d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -32,6 +32,7 @@
 #include <linux/writeback.h>
 #include <linux/security.h>
 #include <linux/sunrpc/xdr.h>
+#include <linux/fileattr.h>
 
 #include "xdr3.h"
 
@@ -2891,3 +2892,45 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
 
 	return err? nfserrno(err) : 0;
 }
+
+/**
+ * nfsd_get_case_info - get case sensitivity info for a dentry
+ * @dentry: dentry to query
+ * @case_insensitive: output, true if the filesystem is case-insensitive
+ * @case_preserving: output, true if the filesystem preserves case
+ *
+ * Filesystems without ->fileattr_get report POSIX defaults
+ * (case-sensitive, case-preserving). Outputs are unmodified on
+ * failure.
+ *
+ * Return: nfs_ok on success, or an nfserr on failure.
+ */
+__be32
+nfsd_get_case_info(struct dentry *dentry, bool *case_insensitive,
+		   bool *case_preserving)
+{
+	struct file_kattr fa = {};
+	int err;
+
+	err = vfs_fileattr_get(dentry, &fa);
+	switch (err) {
+	case 0:
+		/* Success. */
+		break;
+	case -EINVAL:
+	case -ENOTTY:
+	case -ENOIOCTLCMD:
+		/* Query not supported: Report POSIX defaults. */
+		break;
+	default:
+		/*
+		 * Query failed: Propagate that error since
+		 * support for case-folding is unknown.
+		 */
+		return nfserrno(err);
+	}
+
+	*case_insensitive = fa.fsx_xflags & FS_XFLAG_CASEFOLD;
+	*case_preserving = !(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING);
+	return nfs_ok;
+}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 702a844f2106..abf33389ee81 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -156,6 +156,9 @@ __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
 			     loff_t *, struct readdir_cd *, nfsd_filldir_t);
 __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *, int access);
+__be32		nfsd_get_case_info(struct dentry *dentry,
+				   bool *case_insensitive,
+				   bool *case_preserving);
 
 __be32		nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
 				struct dentry *dentry, int acc);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v11 14/15] nfsd: Implement NFSv4 FATTR4_CASE_INSENSITIVE and FATTR4_CASE_PRESERVING
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

NFSD currently provides NFSv4 clients with hard-coded responses
indicating all exported filesystems are case-sensitive and
case-preserving. This is incorrect for case-insensitive filesystems
and ext4 directories with casefold enabled.

Query the underlying filesystem's actual case sensitivity via
nfsd_get_case_info() and return accurate values to clients. This
supports per-directory settings for filesystems that allow mixing
case-sensitive and case-insensitive directories within an export.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2a0946c630e1..68b23863dab1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3158,6 +3158,8 @@ struct nfsd4_fattr_args {
 	u32			rdattr_err;
 	bool			contextsupport;
 	bool			ignore_crossmnt;
+	bool			case_insensitive;
+	bool			case_preserving;
 };
 
 typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr,
@@ -3356,6 +3358,33 @@ static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr,
 	return nfs_ok;
 }
 
+static __be32 nfsd4_encode_fattr4_case_insensitive(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	return nfsd4_encode_bool(xdr, args->case_insensitive);
+}
+
+static __be32 nfsd4_encode_fattr4_case_preserving(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	return nfsd4_encode_bool(xdr, args->case_preserving);
+}
+
+static __be32 nfsd4_encode_fattr4_homogeneous(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	/*
+	 * Filesystems with a Unicode encoding loaded (e.g. ext4, f2fs
+	 * with the casefold feature) expose case folding as a
+	 * per-directory attribute, so the per-file-system
+	 * case_insensitive and case_preserving values can legitimately
+	 * differ across objects that share the same fsid.  Report
+	 * FATTR4_HOMOGENEOUS = FALSE on such filesystems to keep that
+	 * variation consistent with RFC 8881 Section 5.8.2.16.
+	 */
+	return nfsd4_encode_bool(xdr, !sb_has_encoding(args->dentry->d_sb));
+}
+
 static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr,
 					     const struct nfsd4_fattr_args *args)
 {
@@ -3748,8 +3777,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 	[FATTR4_ACLSUPPORT]		= nfsd4_encode_fattr4_aclsupport,
 	[FATTR4_ARCHIVE]		= nfsd4_encode_fattr4__noop,
 	[FATTR4_CANSETTIME]		= nfsd4_encode_fattr4__true,
-	[FATTR4_CASE_INSENSITIVE]	= nfsd4_encode_fattr4__false,
-	[FATTR4_CASE_PRESERVING]	= nfsd4_encode_fattr4__true,
+	[FATTR4_CASE_INSENSITIVE]	= nfsd4_encode_fattr4_case_insensitive,
+	[FATTR4_CASE_PRESERVING]	= nfsd4_encode_fattr4_case_preserving,
 	[FATTR4_CHOWN_RESTRICTED]	= nfsd4_encode_fattr4__true,
 	[FATTR4_FILEHANDLE]		= nfsd4_encode_fattr4_filehandle,
 	[FATTR4_FILEID]			= nfsd4_encode_fattr4_fileid,
@@ -3758,7 +3787,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 	[FATTR4_FILES_TOTAL]		= nfsd4_encode_fattr4_files_total,
 	[FATTR4_FS_LOCATIONS]		= nfsd4_encode_fattr4_fs_locations,
 	[FATTR4_HIDDEN]			= nfsd4_encode_fattr4__noop,
-	[FATTR4_HOMOGENEOUS]		= nfsd4_encode_fattr4__true,
+	[FATTR4_HOMOGENEOUS]		= nfsd4_encode_fattr4_homogeneous,
 	[FATTR4_MAXFILESIZE]		= nfsd4_encode_fattr4_maxfilesize,
 	[FATTR4_MAXLINK]		= nfsd4_encode_fattr4_maxlink,
 	[FATTR4_MAXNAME]		= nfsd4_encode_fattr4_maxname,
@@ -3968,6 +3997,26 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		args.fhp = tempfh;
 	} else
 		args.fhp = fhp;
+	if (attrmask[0] & (FATTR4_WORD0_CASE_INSENSITIVE |
+			   FATTR4_WORD0_CASE_PRESERVING)) {
+		struct dentry *cd = dentry;
+
+		/*
+		 * On casefold-capable file systems the flag lives
+		 * on the directory, not on its entries. For a
+		 * non-directory object, name-comparison semantics
+		 * come from its parent. A directory (including the
+		 * export root, whose parent is outside the export)
+		 * is queried as-is so its own contents' lookup
+		 * behaviour is reported.
+		 */
+		if (!d_is_dir(dentry))
+			cd = dentry->d_parent;
+		status = nfsd_get_case_info(cd, &args.case_insensitive,
+					    &args.case_preserving);
+		if (status != nfs_ok)
+			goto out;
+	}
 
 	if (attrmask[0] & FATTR4_WORD0_ACL) {
 		err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v11 15/15] ksmbd: Report filesystem case sensitivity via FS_ATTRIBUTE_INFORMATION
From: Chuck Lever @ 2026-04-25  1:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260424-case-sensitivity-v11-0-de5619beddaf@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

FS_ATTRIBUTE_INFORMATION responses have always reported
FILE_CASE_SENSITIVE_SEARCH and FILE_CASE_PRESERVED_NAMES
unconditionally. Case-insensitive filesystems like exFAT, and
casefolded directories on ext4 or f2fs, have no way to signal
their actual semantics to SMB clients.

Now that filesystems expose case behavior through ->fileattr_get,
query it via vfs_fileattr_get() and translate the FS_XFLAG_CASEFOLD
and FS_XFLAG_CASENONPRESERVING flags into the corresponding SMB
attributes. Filesystems without ->fileattr_get continue reporting
default POSIX behavior (case-sensitive, case-preserving).

SMB's FS_ATTRIBUTE_INFORMATION reports per-share attributes from
the share root, not per-file. Shares mixing casefold and
non-casefold directories report the root directory's behavior.

Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/smb/server/smb2pdu.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ee32e61b6d3c..face5390c614 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -14,6 +14,7 @@
 #include <linux/falloc.h>
 #include <linux/mount.h>
 #include <linux/filelock.h>
+#include <linux/fileattr.h>
 
 #include "glob.h"
 #include "smbfsctl.h"
@@ -5541,16 +5542,33 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 	case FS_ATTRIBUTE_INFORMATION:
 	{
 		FILE_SYSTEM_ATTRIBUTE_INFO *info;
+		struct file_kattr fa = {};
 		size_t sz;
+		u32 attrs;
+		int err;
 
 		info = (FILE_SYSTEM_ATTRIBUTE_INFO *)rsp->Buffer;
-		info->Attributes = cpu_to_le32(FILE_SUPPORTS_OBJECT_IDS |
-					       FILE_PERSISTENT_ACLS |
-					       FILE_UNICODE_ON_DISK |
-					       FILE_CASE_PRESERVED_NAMES |
-					       FILE_CASE_SENSITIVE_SEARCH |
-					       FILE_SUPPORTS_BLOCK_REFCOUNTING);
+		attrs = FILE_SUPPORTS_OBJECT_IDS |
+			FILE_PERSISTENT_ACLS |
+			FILE_UNICODE_ON_DISK |
+			FILE_SUPPORTS_BLOCK_REFCOUNTING;
 
+		err = vfs_fileattr_get(path.dentry, &fa);
+		/*
+		 * -EINVAL: ntfs-3g and other FUSE filesystems that lack
+		 * FS_IOC_FSGETXATTR support.
+		 */
+		if (err && err != -ENOIOCTLCMD && err != -ENOTTY &&
+		    err != -EINVAL) {
+			path_put(&path);
+			return err;
+		}
+		if (!(fa.fsx_xflags & FS_XFLAG_CASEFOLD))
+			attrs |= FILE_CASE_SENSITIVE_SEARCH;
+		if (!(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING))
+			attrs |= FILE_CASE_PRESERVED_NAMES;
+
+		info->Attributes = cpu_to_le32(attrs);
 		info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps);
 
 		if (test_share_config_flag(work->tcon->share_conf,

-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v2] generic/790: test post-EOF gap zeroing persistence
From: Zhang Yi @ 2026-04-25  3:06 UTC (permalink / raw)
  To: Brian Foster
  Cc: fstests, zlang, linux-ext4, linux-fsdevel, jack, yi.zhang,
	yizhang089, yangerkun
In-Reply-To: <aetreLr6tt1Vb-GJ@bfoster>

On 4/24/2026 9:09 PM, Brian Foster wrote:
> On Fri, Apr 24, 2026 at 05:22:28PM +0800, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> Test that extending a file past a non-block-aligned EOF correctly
>> zero-fills the gap [old_EOF, block_boundary), and that this zeroing
>> persists through a filesystem shutdown+remount cycle.
>>
>> Stale data beyond EOF can persist on disk when append write data blocks
>> are flushed before the on-disk file size update, or when concurrent
>> append writeback and mmap writes persist non-zero data past EOF.
>> Subsequent post-EOF operations (append write, fallocate, truncate up)
>> must zero-fill and persist the gap to prevent exposing stale data.
>>
>> The test pollutes the file's last physical block (via FIEMAP + raw
>> device write) with a sentinel pattern beyond i_size, then performs each
>> extend operation and verifies the gap is zeroed both in memory and on
>> disk.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>> v1->v2:
>>  - Add _require_no_realtime to prevent testing on XFS realtime devices,
>>    where file data may reside on $SCRATCH_RTDEV.
>>  - Add _exclude_fs btrfs since FIEMAP returns logical addresses, not
>>    physical device offsets, writing to these offsets on $SCRATCH_DEV
>>    would corrupt the filesystem in multi-device setups. Besides, since
>>    btrfs doesn't support shutdown right now, we can support it later.
>>  - Add -v flag to od in _check_gap_zero() to prevent line folding of
>>    identical consecutive lines.
>>  - Add expected_new_sz parameter to _test_eof_zeroing(), verify file
>>    size was not rolled back after shutdown+remount cycle, and also drop
>>    the unnecessary file size check before the shutdown as well.
>>  - Clarify the comment regarding when stale data beyond EOF can persist.
>>
> 
> Thanks for the tweaks. This all LGTM from a review standpoint. I gave it
> a quick test on latest master and I see a few failures in a couple runs:
> 
> - On XFS (mkfs defaults) I saw one unexpected i_size failure and one
>   zeroing failure, both on write extension fwiw.

Previously, I only discovered the zeroing failure of append write. This
is because xfs_file_write_checks() -> xfs_file_write_zero_eof() only
zeroes the gap range in the page cache, without providing any
synchronous or asynchronous persistence (instead, truncate up does
synchronously writeback in xfs_vn_setattr_size(), and ext4 achieves
persistence via asynchronous writeback in data=ordered mode). So I think
this is a XFS problem.

Regarding the i_size failure, I did not directly reproduce this issue.
After analysis, I believe it is because the test case did not include
the -a option in sync_range, meaning it did not wait for IO writeback
completion and file size update persistence. I reproduced this issue by
adding a delay in the XFS end IO path. This is a problem with the test
case, and I will fix it in v3. Thank you for pointing this out.

> - On ext4 I saw a few unexpected i_size failures (both with mkfs
>   defaults and 1k block size).
> 

This is an ext4 issue on the shutdown path. Since ext4 set the shutdown
flag too early, it was unable to write back ordered zero data when
flushing the journal, which led to a journal abort and prevented the file
size update from being persisted. I have submitted a patch to fix this
issue. Please see below link for details.

 https://lore.kernel.org/linux-ext4/20260424104201.1930823-1-yi.zhang@huaweicloud.com/

Thanks
Yi.

> I haven't dug into anything beyond that. Does this match what you're
> seeing on current kernels or are these unexpected failures?
> 
> Brian
> 
>>  tests/generic/790     | 164 ++++++++++++++++++++++++++++++++++++++++++
>>  tests/generic/790.out |   4 ++
>>  2 files changed, 168 insertions(+)
>>  create mode 100755 tests/generic/790
>>  create mode 100644 tests/generic/790.out
>>
>> diff --git a/tests/generic/790 b/tests/generic/790
>> new file mode 100755
>> index 00000000..2adc06f8
>> --- /dev/null
>> +++ b/tests/generic/790
>> @@ -0,0 +1,164 @@
>> +#! /bin/bash
>> +# SPDX-License-Identifier: GPL-2.0
>> +# Copyright (c) 2026 Huawei.  All Rights Reserved.
>> +#
>> +# FS QA Test No. 790
>> +#
>> +# Test that extending a file past a non-block-aligned EOF correctly zero-fills
>> +# the gap [old_EOF, block_boundary), and that this zeroing persists through a
>> +# filesystem shutdown+remount cycle.
>> +#
>> +# Stale data beyond EOF can persist on disk when:
>> +# 1) append write data blocks are flushed before the on-disk file size update,
>> +#    and the system crashes in this window.
>> +# 2) concurrent append writeback and mmap writes persist non-zero data past EOF.
>> +#
>> +# Subsequent post-EOF operations (append write, fallocate, truncate up) must
>> +# zero-fill and persist the gap to prevent exposing stale data.
>> +#
>> +# The test pollutes the file's last physical block (via FIEMAP + raw device
>> +# write) with a sentinel pattern beyond i_size, then performs each extend
>> +# operation and verifies the gap is zeroed both in memory and on disk.
>> +#
>> +. ./common/preamble
>> +_begin_fstest auto quick rw shutdown
>> +
>> +. ./common/filter
>> +
>> +_require_scratch
>> +_require_block_device $SCRATCH_DEV
>> +_require_no_realtime
>> +_require_scratch_shutdown
>> +_require_metadata_journaling $SCRATCH_DEV
>> +
>> +# FIEMAP on Btrfs returns logical addresses within the filesystem's address
>> +# space, not physical device offsets. Writing to these offsets on $SCRATCH_DEV
>> +# would corrupt the filesystem in multi-device setups.
>> +_exclude_fs btrfs
>> +
>> +_require_xfs_io_command "fiemap"
>> +_require_xfs_io_command "falloc"
>> +_require_xfs_io_command "pwrite"
>> +_require_xfs_io_command "truncate"
>> +_require_xfs_io_command "sync_range"
>> +
>> +# Check that gap region [offset, offset+nbytes) is entirely zero
>> +_check_gap_zero()
>> +{
>> +	local file="$1"
>> +	local offset="$2"
>> +	local nbytes="$3"
>> +	local label="$4"
>> +	local data
>> +	local stripped
>> +
>> +	data=$(od -A n -t x1 -v -j $offset -N $nbytes "$file" 2>/dev/null)
>> +
>> +	# Remove whitespace and check if any byte is non-zero
>> +	stripped=$(printf '%s' "$data" | tr -d ' \n\t')
>> +	if [ -n "$stripped" ] && ! echo "$stripped" | grep -qE "^0+$"; then
>> +		echo "FAIL: non-zero data in gap [$offset,$((offset + nbytes))) $label"
>> +		_hexdump -N $((offset + nbytes)) "$file"
>> +		return 1
>> +	fi
>> +	return 0
>> +}
>> +
>> +# Get the physical block offset (in bytes) of the file's first block on device
>> +_get_phys_offset()
>> +{
>> +	local file="$1"
>> +	local fiemap_output
>> +	local phys_blk
>> +
>> +	fiemap_output=$($XFS_IO_PROG -r -c "fiemap -v" "$file" 2>/dev/null)
>> +	phys_blk=$(echo "$fiemap_output" | _filter_xfs_io_fiemap | head -1 | awk '{print $3}')
>> +	if [ -z "$phys_blk" ]; then
>> +		echo ""
>> +		return
>> +	fi
>> +	# Convert 512-byte blocks to bytes
>> +	echo $((phys_blk * 512))
>> +}
>> +
>> +_test_eof_zeroing()
>> +{
>> +	local test_name="$1"
>> +	local extend_cmd="$2"
>> +	local expected_new_sz="$3"
>> +	local file=$SCRATCH_MNT/testfile_${test_name}
>> +
>> +	echo "$test_name" | tee -a $seqres.full
>> +
>> +	# Compute non-block-aligned EOF offset
>> +	local gap_bytes=16
>> +	local eof_offset=$((blksz - gap_bytes))
>> +
>> +	# Step 1: Write one full block to ensure the filesystem allocates a
>> +	#         physical block for the file instead of using inline data.
>> +	$XFS_IO_PROG -f -c "pwrite -S 0x5a 0 $blksz" -c fsync \
>> +		"$file" >> $seqres.full 2>&1
>> +
>> +	# Step 2: Get physical block offset on device via FIEMAP
>> +	local phys_offset
>> +	phys_offset=$(_get_phys_offset "$file")
>> +	if [ -z "$phys_offset" ]; then
>> +		_fail "$test_name: failed to get physical block offset via fiemap"
>> +	fi
>> +
>> +	# Step 3: Truncate file to non-block-aligned size and fsync.
>> +	#         The on-disk region [eof_offset, blksz) may or may not be
>> +	#         zeroed by the filesystem at this point.
>> +	$XFS_IO_PROG -c "truncate $eof_offset" -c fsync \
>> +		"$file" >> $seqres.full 2>&1
>> +
>> +	# Step 4: Unmount and restore the physical block to all-0x5a on disk.
>> +	#         This bypasses the kernel's pagecache EOF-zeroing to ensure
>> +	#         the stale pattern is present on disk. Then remount.
>> +	_scratch_unmount
>> +	$XFS_IO_PROG -d -c "pwrite -S 0x5a $phys_offset $blksz" \
>> +		$SCRATCH_DEV >> $seqres.full 2>&1
>> +	_scratch_mount >> $seqres.full 2>&1
>> +
>> +	# Step 5: Execute the extend operation.
>> +	$XFS_IO_PROG -c "$extend_cmd" "$file" >> $seqres.full 2>&1
>> +
>> +	# Step 6: Verify gap [eof_offset, blksz) is zeroed BEFORE shutdown
>> +	_check_gap_zero "$file" $eof_offset $gap_bytes "before shutdown" || return 1
>> +
>> +	# Step 7: Sync the extended range and shutdown the filesystem with
>> +	#         journal flush. This persists the file size extending, and
>> +	#         the filesystem should persist the zeroed data in the gap
>> +	#         range as well.
>> +	if [ "$extend_cmd" != "${extend_cmd#pwrite}" ]; then
>> +		$XFS_IO_PROG -c "sync_range -w $blksz $blksz" \
>> +			"$file" >> $seqres.full 2>&1
>> +	fi
>> +	_scratch_shutdown -f
>> +
>> +	# Step 8: Remount and verify gap is still zeroed
>> +	_scratch_cycle_mount
>> +
>> +	# Verify file size was not rolled back after shutdown+remount
>> +	local sz
>> +	sz=$(stat -c %s "$file")
>> +	if [ "$sz" -ne "$expected_new_sz" ]; then
>> +		_fail "$test_name: file size rolled back after shutdown+remount: $sz != $expected_new_sz"
>> +	fi
>> +
>> +	_check_gap_zero "$file" $eof_offset $gap_bytes "after shutdown+remount" || return 1
>> +}
>> +
>> +_scratch_mkfs >> $seqres.full 2>&1
>> +_scratch_mount
>> +
>> +blksz=$(_get_block_size $SCRATCH_MNT)
>> +
>> +# Test three variants of EOF-extending operations
>> +_test_eof_zeroing "append_write" "pwrite -S 0x42 $blksz $blksz" $((blksz * 2))
>> +_test_eof_zeroing "truncate_up" "truncate $((blksz * 2))" $((blksz * 2))
>> +_test_eof_zeroing "fallocate" "falloc $blksz $blksz" $((blksz * 2))
>> +
>> +# success, all done
>> +status=0
>> +exit
>> diff --git a/tests/generic/790.out b/tests/generic/790.out
>> new file mode 100644
>> index 00000000..e5e2cc09
>> --- /dev/null
>> +++ b/tests/generic/790.out
>> @@ -0,0 +1,4 @@
>> +QA output created by 790
>> +append_write
>> +truncate_up
>> +fallocate
>> -- 
>> 2.52.0
>>


^ permalink raw reply

* Re: [BUG] ext4: BUG_ON in ext4_write_inline_data (fs/ext4/inline.c:240)
From: Demi Marie Obenour @ 2026-04-25 18:00 UTC (permalink / raw)
  To: Theodore Tso, Zw Tang
  Cc: Andreas Dilger, libaokun, jack, ojaswin, linux-ext4, linux-kernel,
	yi.zhang, syzkaller-bugs
In-Reply-To: <20260421122059.GA86221@macsyma.local>


[-- Attachment #1.1.1: Type: text/plain, Size: 2616 bytes --]

On 4/21/26 08:20, Theodore Tso wrote:
> On Tue, Apr 21, 2026 at 07:32:43PM +0800, Zw Tang wrote:
>> This looks like an ext4 inline-data boundary/state inconsistency triggered
>> while writing to an ext4 image crafted by syzkaller. The later
>> KASAN: slab-use-after-free in rwsem_down_write_slowpath() appears to be a
>> secondary effect after the primary ext4 BUG, likely during teardown/unlink
>> after the filesystem failure.
> 
> Writing to a mounted image is not something that we consider a valid
> threat model.  If you can write to a mounted image, there are a
> zillion different ways that you can creash the kernel, or you can
> create a setuid shell, etc.
> 
> The upstream syzkaller bot makes sure that CONFIG_BLK_DEV_WRITE_MOUNTED
> is not defined to avoid syzkaller noise.

CONFIG_BLK_DEV_WRITE_MOUNTED only blocks writing via the specific block
device that is mounted.  It doesn't block writing via other methods.
If I recall correctly, its purpose was to prevent writing to the
buffer cache used by the filesystem driver.

Changing block devices that are mounted is also reachable via USB.
Yes, some distros may disable automount, but users who have stuff to
get done will mount USB devices anyway.  Telling users "don't do this"
very rarely works in practice.

I asked a distro maintainer about using libguestfs by default and
they refused, citing poor performance.  Unfortunately, there is no
way at the OS level to distinguish "trusted device used for backups"
and "untrusted USB stick".

So for now, neither distros nor kernel maintainers are willing to
budge, and in the meantime, users are left vulnerable.

The only ways out of this deadlock that I can see are either:

1. Make a tightly sandboxed FUSE daemon the default *and* fast.
   Ideally, it would:

   a. Run as an ephemeral user.
   b. Have the vast majority of syscalls blocked via seccomp.
   c. Have all access to /dev/fuse mediated by a validating proxy.
   d. Run in namespaces that block accessing any paths, even though
      the seccomp filter would already block any path-related syscalls.
   e. Support all the filesystems the kernel does, most likely via LKL.

2. Harden the kernel filesystem drivers against malicious devices,
   including TOCTOU.

Of course, it is also necessary to set usbcore.authorized_default=0
and use some form of port-based access control, so that one can use USB
keyboards without allowing a USB drive plugged in to act as a keyboard.

Maybe Linux should have been a microkernel after all...
-- 
Sincerely,
Demi Marie Obenour (she/her/hers)

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 7253 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [BUG] ext4: BUG_ON in ext4_write_inline_data (fs/ext4/inline.c:240)
From: Theodore Tso @ 2026-04-26  3:22 UTC (permalink / raw)
  To: Demi Marie Obenour
  Cc: Zw Tang, Andreas Dilger, libaokun, jack, ojaswin, linux-ext4,
	linux-kernel, yi.zhang, syzkaller-bugs
In-Reply-To: <4e76eb68-862d-4b9f-8242-e6aced2704ee@gmail.com>

On Sat, Apr 25, 2026 at 02:00:23PM -0400, Demi Marie Obenour wrote:
> 
> Changing block devices that are mounted is also reachable via USB.
> Yes, some distros may disable automount, but users who have stuff to
> get done will mount USB devices anyway.  Telling users "don't do this"
> very rarely works in practice.

How can an unprivileged user change the contents of a USB device while
it is mounted?

Are you positing evil USB devices that can return block contents A at
time t, and block contents B at time t+1?

The threat model that we are using is that if the USB device is set to
a particular state *before* the file system is mounted, and then the
KGB scatters the USB device in the parking lot, and then someone picks
up the USB device in the Raytheon parking lot, and says, "hey, free
hardware", takes it into the classified machinem room, inserts it into
the server, and mounts it.  This might be considered likely or not
likely, but speaking as someone who has been in a top secret machine
room at a defense contractor, they were *way* less protected than what
I've seen at a financial services company, or at a data center at a
hyperscaler.

But be that as it may, even *then* you're not modifying the block
device while it is mounted.

> 2. Harden the kernel filesystem drivers against malicious devices,
>    including TOCTOU.

Malicious devices that have their own microcomputer and can change the
block contents under the control of the attacker is *just* not
something I care about.  I also don't think it's a particularly
realistic threat model.

Cheers,

						- Ted

^ permalink raw reply

* why does ext4_sync_parent() bother with d_find_any_alias() at all?
From: Al Viro @ 2026-04-26  6:10 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-fsdevel, linux-ext4

	The caller has a dentry for that inode, after all...
Am I missing something there?  IOW, is there anything wrong
with something like (untested) patch below?

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 924726dcc85f..c635cd732c2e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -43,16 +43,14 @@
  * the parent directory's parent as well, and so on recursively, if
  * they are also freshly created.
  */
-static int ext4_sync_parent(struct inode *inode)
+static int ext4_sync_parent(struct inode *inode, struct dentry *dentry)
 {
-	struct dentry *dentry, *next;
+	struct dentry *next;
 	int ret = 0;
 
 	if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
 		return 0;
-	dentry = d_find_any_alias(inode);
-	if (!dentry)
-		return 0;
+	dget(dentry);
 	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
 		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
 
@@ -99,7 +97,7 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
 	if (ret)
 		return ret;
 
-	ret = ext4_sync_parent(inode);
+	ret = ext4_sync_parent(inode, file->f_path.dentry);
 
 	if (test_opt(inode->i_sb, BARRIER))
 		*needs_barrier = true;

^ permalink raw reply related

* [syzbot] [ext4?] BUG: sleeping function called from invalid context in jbd2_journal_commit_transaction
From: syzbot @ 2026-04-26 21:18 UTC (permalink / raw)
  To: jack, linux-ext4, linux-kernel, syzkaller-bugs, tytso

Hello,

syzbot found the following issue on:

HEAD commit:    6596a02b2078 Merge tag 'drm-next-2026-04-22' of https://gi..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=120d3702580000
kernel config:  https://syzkaller.appspot.com/x/.config?x=80b28e8d6ef9384a
dashboard link: https://syzkaller.appspot.com/bug?extid=06294fd562f060379c91
compiler:       Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8

Unfortunately, I don't have any reproducer for this issue yet.

Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/5b2b3122db47/disk-6596a02b.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/7ee12b37f6e5/vmlinux-6596a02b.xz
kernel image: https://storage.googleapis.com/syzbot-assets/aca25073993c/bzImage-6596a02b.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+06294fd562f060379c91@syzkaller.appspotmail.com

BUG: sleeping function ca[   92.150581][ T5155] BUG: sleeping function called from invalid context at fs/jbd2/commit.c:1071
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5155, name: jbd2/sda1-8
preempt_count: 0, expected: 0
RCU nest depth: 2, expected: 1
3 locks held by jbd2/sda1-8/5155:
 #0: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline]
 #0: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline]
 #0: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: __rt_spin_lock kernel/locking/spinlock_rt.c:50 [inline]
 #0: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x1e0/0x400 kernel/locking/spinlock_rt.c:57
 #1: ffff888036ea4920 (&journal->j_list_lock){+.+.}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:45 [inline]
 #1: ffff888036ea4920 (&journal->j_list_lock){+.+.}-{3:3}, at: jbd2_journal_commit_transaction+0x3ad1/0x5b20 fs/jbd2/commit.c:993
 #2: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline]
 #2: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline]
 #2: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: __rt_spin_lock kernel/locking/spinlock_rt.c:50 [inline]
 #2: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x1e0/0x400 kernel/locking/spinlock_rt.c:57
CPU: 0 UID: 0 PID: 5155 Comm: jbd2/sda1-8 Not tainted syzkaller #0 PREEMPT_{RT,(full)} 
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/18/2026
Call Trace:
 <TASK>
 dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120
 __might_resched+0x329/0x480 kernel/sched/core.c:9162
 jbd2_journal_commit_transaction+0x3f59/0x5b20 fs/jbd2/commit.c:1071
 kjournald2+0x3e0/0x760 fs/jbd2/journal.c:201
 kthread+0x388/0x470 kernel/kthread.c:436
 ret_from_fork+0x514/0xb70 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>
------------[ cut here ]------------
Voluntary context switch within RCU read-side critical section!
WARNING: kernel/rcu/tree_plugin.h:332 at rcu_note_context_switch+0xcac/0xf40 kernel/rcu/tree_plugin.h:332, CPU#0: jbd2/sda1-8/5155
Modules linked in:
CPU: 0 UID: 0 PID: 5155 Comm: jbd2/sda1-8 Tainted: G        W           syzkaller #0 PREEMPT_{RT,(full)} 
Tainted: [W]=WARN
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/18/2026
RIP: 0010:rcu_note_context_switch+0xcac/0xf40 kernel/rcu/tree_plugin.h:332
Code: 00 41 c6 45 00 00 48 8b 3d a1 20 e1 0d 48 81 c4 b8 00 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d e9 bb 5f ff ff 48 8d 3d a4 e2 e4 0d <67> 48 0f b9 3a e9 1b f4 ff ff 90 0f 0b 90 45 84 e4 0f 84 ea f3 ff
RSP: 0018:ffffc90010177930 EFLAGS: 00010002
RAX: 0000000000000000 RBX: ffff888036ec5c40 RCX: 0000000080000002
RDX: 0000000000000000 RSI: ffffffff8ba74040 RDI: ffffffff8f8f3dd0
RBP: dffffc0000000000 R08: ffffffff8f8bcef7 R09: 1ffffffff1f179de
R10: dffffc0000000000 R11: fffffbfff1f179df R12: 0000000000000000
R13: dffffc0000000000 R14: ffff8880b883c800 R15: ffff888036ec6104
FS:  0000000000000000(0000) GS:ffff8881260fb000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055557df50340 CR3: 0000000011b24000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 __schedule+0x291/0x54c0 kernel/sched/core.c:7043
 __schedule_loop kernel/sched/core.c:7267 [inline]
 schedule+0x164/0x360 kernel/sched/core.c:7282
 kjournald2+0x410/0x760 fs/jbd2/journal.c:230
 kthread+0x388/0x470 kernel/kthread.c:436
 ret_from_fork+0x514/0xb70 arch/x86/kernel/process.c:158
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>
----------------
Code disassembly (best guess):
   0:	00 41 c6             	add    %al,-0x3a(%rcx)
   3:	45 00 00             	add    %r8b,(%r8)
   6:	48 8b 3d a1 20 e1 0d 	mov    0xde120a1(%rip),%rdi        # 0xde120ae
   d:	48 81 c4 b8 00 00 00 	add    $0xb8,%rsp
  14:	5b                   	pop    %rbx
  15:	41 5c                	pop    %r12
  17:	41 5d                	pop    %r13
  19:	41 5e                	pop    %r14
  1b:	41 5f                	pop    %r15
  1d:	5d                   	pop    %rbp
  1e:	e9 bb 5f ff ff       	jmp    0xffff5fde
  23:	48 8d 3d a4 e2 e4 0d 	lea    0xde4e2a4(%rip),%rdi        # 0xde4e2ce
* 2a:	67 48 0f b9 3a       	ud1    (%edx),%rdi <-- trapping instruction
  2f:	e9 1b f4 ff ff       	jmp    0xfffff44f
  34:	90                   	nop
  35:	0f 0b                	ud2
  37:	90                   	nop
  38:	45 84 e4             	test   %r12b,%r12b
  3b:	0f                   	.byte 0xf
  3c:	84 ea                	test   %ch,%dl
  3e:	f3                   	repz
  3f:	ff                   	.byte 0xff


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title

If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)

If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report

If you want to undo deduplication, reply with:
#syz undup

^ permalink raw reply

* Re: [PATCH v3 v3 2/2] ext4: allow clearing mballoc stats through mb_stats
From: liubaolin @ 2026-04-27  1:24 UTC (permalink / raw)
  To: Theodore Tso
  Cc: libaokun, adilger.kernel, ojaswin, ritesh.list, yi.zhang,
	linux-ext4, linux-kernel, wangguanyu, Baolin Liu, Andreas Dilger
In-Reply-To: <20260424120702.GD11127@macsyma-wired.lan>

Dear Ted,
    Thank you and Baokun for your review and suggestions.
    I will incorporate your suggestions and submit the v4 patch as soon 
as possible.

Thanks,
Baolin

在 2026/4/24 20:07, Theodore Tso 写道:
> On Fri, Apr 24, 2026 at 04:09:31PM +0800, liubaolin wrote:
>>
>>     2. Do not delete the `/sys/fs/.../mb_stats` node for now; implement the
>> same write control logic.
>> 	* Write 0 to `/sys/fs/.../mb_stats` to disable statistics collection.
>> 	* Write 1 to `/sys/fs/.../mb_stats` to enable statistics collection.
>> 	* Write 2 to `/sys/fs/.../mb_stats` to clear statistics counters.
> 
> We could do that, but note that currently writing to
> /sys/fs/.../mb_stats just sets an unsigned integer in
> EXT4(sb)->s_mb_stats.  There is no ext4-specific function that runs
> when /sys/fs/.../mb_stats is updated.
> 
> So either you have to add some check in fs/ext4/mballoc.c which gets
> called every single time a block allocation happens --- and consider
> the race condition where two CPU's are checking s_mb_stats at the same
> time, and the desireability of adding a spinlock that would need to be
> taken every single time a block allocation happens ---- or you have
> add an ext4-specific function in fs/ext4/sysfs.c.
> 
>>     Compared to your suggestion, I recommend using the value 2 for the clear
>> operation because s_mb_stats is an unsigned int variable, and using -1
>> requires changing the variable type.
> 
> Well, since you have introduced an ext4-specific function which gets
> called when writing to the procfs file, that function can clear the
> statistics counter when -1 is written to the file --- and then set
> s_mbi_stats to 1.
> 
> Cheers,
> 
> 						- Ted


^ permalink raw reply

* Re: [PATCH v3 v3 2/2] ext4: allow clearing mballoc stats through mb_stats
From: liubaolin @ 2026-04-27  1:29 UTC (permalink / raw)
  To: Baokun Li, Theodore Tso
  Cc: adilger.kernel, ojaswin, ritesh.list, yi.zhang, linux-ext4,
	linux-kernel, wangguanyu, Baolin Liu, Andreas Dilger
In-Reply-To: <5eaa521b-28b0-4c2a-a33d-57d1449f125e@linux.alibaba.com>

Dear Baokun,
    Thank you and Ted for your review and suggestions.
    I will incorporate your suggestions and submit the v4 patch as soon 
as possible.

Thanks,
Baolin

在 2026/4/24 17:34, Baokun Li 写道:
> 
> On 2026/4/24 16:09, liubaolin wrote:
>>
>>
>> 在 2026/4/24 0:19, Theodore Tso 写道:
>>> On Wed, Apr 22, 2026 at 09:50:25AM +0800, Baolin Liu wrote:
>>>> From: Baolin Liu <liubaolin@kylinos.cn>
>>>>
>>>> Make /proc/fs/ext4/<dev>/mb_stats writable and clear the runtime
>>>> mballoc statistics when 0 is written.
>>>
>>> At the moment to enable mb_stats the system administrator needs to
>>> write "1" to /sys/fs/ext4/<dev>/mb_stats, and writing "0" to the sysfs
>>> file will pauce the statistics colleciton (but not clear the
>>> statistics).  Adding a way to clear the statistics by writing to the
>>> procfs file might be confusing to users.
>>>
>>> So.... as a suggestion, if you're adding to the ability to write to
>>> /proc/fs/.../mb_stats, what if we make things work by
>>>
>>>      * Write 1 to /proc/fs/.../mb_stats to  enable statistics collection
>>>      * Write 0 to /proc/fs/.../mb_stats to  disable statistics collection
>>>      * Write -1 to /proc/fs/.../mb_stats to clear statistics counters
>>>
>>> And then deprecate the /sys/fs/.../mb_stats variable (but we probably
>>> won't be able to remove it for at least a year or two).
>>>
>>>                                          - Ted
>> Dear Ted, Baokun,
>>     Thank you for your review and suggestions.
>>     Since you mentioned that /sys/fs/.../mb_stats cannot be deleted in
>> the short term,
>>     I plan to modify and submit a v4 patch according to the following
>> strategy.
>>
>>     1. Change `/proc/fs/.../mb_stats` to read-write mode.
>>      * Read `/proc/fs/.../mb_stats` to show statistics counters.
>>      * Write 0 to `/proc/fs/.../mb_stats` to disable statistics
>> collection.
>>      * Write 1 to `/proc/fs/.../mb_stats` to enable statistics collection.
>>      * Write 2 to `/proc/fs/.../mb_stats` to clear statistics counters.
>>
>>     2. Do not delete the `/sys/fs/.../mb_stats` node for now; implement
>> the same write control logic.
>>      * Write 0 to `/sys/fs/.../mb_stats` to disable statistics collection.
>>      * Write 1 to `/sys/fs/.../mb_stats` to enable statistics collection.
>>      * Write 2 to `/sys/fs/.../mb_stats` to clear statistics counters.
>>
>>      Delete `/sys/fs/.../mb_stats` later when it is possible to delete it.
>>
>>     3. Modify the relevant documentation for `mb_stats`.
>>      Documentation/ABI/testing/sysfs-fs-ext4
>>      Documentation/admin-guide/ext4.rst
>>      Documentation/filesystems/proc.rst
>>
>>     Compared to your suggestion, I recommend using the value 2 for the
>> clear operation because s_mb_stats is an unsigned int variable, and
>> using -1 requires changing the variable type.
>>     I suggest avoiding changing the s_mb_stats variable type unless
>> absolutely necessary.
>>
>>     Do you think this modification is appropriate?
>>     If there are no problems, I will start modifying the code and
>> submit the v4 patch as soon as possible.
> 
> For the clear command, we only handle it without storing it, so s_mb_stats
> remains unchanged and still stores only 0 and non-zero values to represent
> disabled and enabled, respectively. Otherwise, you will have to deal with
> a large number of s_mb_stats checks
> 
> That means the /sys/fs/.../mb_stats interface does not need to support
> clearing, but it might make sense to add a deprecation warning there.
> 
> Then in `/proc/fs/.../mb_stats`, writing 0 or a positive number passes
> it to s_mb_stats, writing -1 performs a reset, and other negative values
> return -EINVAL.
> 
> 
> Cheers,
> Baokun
> 


^ permalink raw reply

* Re: [PATCH] ext4: fix LOGFLUSH shutdown ordering to allow ordered-mode data writeback
From: Jan Kara @ 2026-04-27  9:53 UTC (permalink / raw)
  To: Zhang Yi
  Cc: linux-ext4, linux-fsdevel, linux-kernel, tytso, adilger.kernel,
	libaokun, jack, ojaswin, ritesh.list, yi.zhang, yizhang089,
	yangerkun, yukuai
In-Reply-To: <20260424104201.1930823-1-yi.zhang@huaweicloud.com>

On Fri 24-04-26 18:42:01, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> In EXT4_GOING_FLAGS_LOGFLUSH mode, the EXT4_FLAGS_SHUTDOWN flag was set
> before calling ext4_force_commit().  This caused ordered-mode data
> writeback (triggered by journal commit) to fail with -EIO, since
> ext4_do_writepages() checks for the shutdown flag.  The journal would
> then be aborted prematurely before the commit could succeed.
> 
> Fix this by calling ext4_force_commit() first, then setting the
> shutdown flag, so that pending data can be written back correctly.
> 
> Note that moving ext4_force_commit() before setting the shutdown flag
> creates a small window in which new writes may occur and generate new
> journal transactions.  When the journal is subsequently aborted, the
> new transactions will not be able to write to disk.  This is intentional
> because LOGFLUSH's semantics are to flush pre-existing journal entries
> before shutdown, not to guarantee atomicity for writes that race with
> the ioctl.
> 
> Fixes: 783d94854499 ("ext4: add EXT4_IOC_GOINGDOWN ioctl")
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
> This fix addresses my new generic/970 test, which fails during the file
> size verification after shutdown and remount.
> 
>  https://lore.kernel.org/fstests/20260424092228.1396658-1-yi.zhang@huaweicloud.com/
> 
>  fs/ext4/ioctl.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 1d0c3d4bdf47..110e3fb194ec 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -830,11 +830,17 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
>  		bdev_thaw(sb->s_bdev);
>  		break;
>  	case EXT4_GOING_FLAGS_LOGFLUSH:
> +		/*
> +		 * Call ext4_force_commit() before setting EXT4_FLAGS_SHUTDOWN.
> +		 * This is because in data=ordered mode, journal commit
> +		 * triggers data writeback which fails if shutdown is already
> +		 * set, causing the journal to be aborted prematurely before
> +		 * the commit succeeds.
> +		 */
> +		(void) ext4_force_commit(sb);
>  		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
> -		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
> -			(void) ext4_force_commit(sb);
> +		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
>  			jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
> -		}
>  		break;
>  	case EXT4_GOING_FLAGS_NOLOGFLUSH:
>  		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
> -- 
> 2.52.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH v2 3/3] ext4: derive f_fsid from block device to avoid collisions
From: Anand Jain @ 2026-04-27 10:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Tso, Darrick J. Wong, linux-ext4, linux-btrfs, linux-xfs,
	Anand Jain, dsterba
In-Reply-To: <aempTb5TyKG0vPL0@infradead.org>



On 23/4/26 13:08, Christoph Hellwig wrote:
> On Wed, Apr 22, 2026 at 07:39:57PM +0800, Anand Jain wrote:
>>> No, T10 does not actually mandate unique identifiers, NVMe does, but the
>>> implementations are often totally broken.
>>
>> Right. Newer SPC-3 (and above) compliant devices must support
>> the Inquiry CDB EVPD flag and provide page 0x83 for identification,
>> which is what we typically use for multipathing.
> 
> But there is no requirement for it to contain something useful.
> 
>> These are globally unique. And, we can overlook legacy
>> drives, as they've probably been past their EOSL for a while now.
> 
> We have absolutely no useful identifiers for most USB devices.
> NVMe devices have broken identifiers all the time as well.
> 
> So no, you can't.
 Agreed. I have dropped the plan to rely on hardware identifiers;
 the lack of consistency in USB/NVMe firmware makes them a
 non-starter for a stable identifier.

 The updated patch set, excluding the ext4 changes, has been
 submitted to the mailing list. Fstests test case updated as well.

Thanks, Anand

^ permalink raw reply

* [PATCH v3 0/9] fstests: add test coverage for cloned filesystem ids
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch

v3:
 Dropped the -o nouuid mount option.
 Added a Btrfs test case to verify clones of filesystems using the
   metadata_uuid superblock feature.
 Updated _loop_image_create_clone() to handle necessary pre-clone
   filesystem tuning.

v2:
  https://lore.kernel.org/fstests/cover.1774090817.git.asj@kernel.org

Anand Jain (9):
  fstests: add _loop_image_create_clone() helper
  fstests: add _clone_mount_option() helper
  fstests: add test for inotify isolation on cloned devices
  fstests: verify fanotify isolation on cloned filesystems
  fstests: verify f_fsid for cloned filesystems
  fstests: verify libblkid resolution of duplicate UUIDs
  fstests: verify IMA isolation on cloned filesystems
  fstests: verify exportfs file handles on cloned filesystems
  fstests: btrfs: test UUID consistency for clones with metadata_uuid

 common/config         |   2 +
 common/rc             |  58 +++++++++++++++++++++
 tests/btrfs/348       |  91 +++++++++++++++++++++++++++++++++
 tests/btrfs/348.out   |  19 +++++++
 tests/generic/800     |  88 ++++++++++++++++++++++++++++++++
 tests/generic/800.out |   7 +++
 tests/generic/801     | 115 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/801.out |   7 +++
 tests/generic/802     |  61 ++++++++++++++++++++++
 tests/generic/802.out |   7 +++
 tests/generic/803     |  75 +++++++++++++++++++++++++++
 tests/generic/803.out |  19 +++++++
 tests/generic/804     | 102 +++++++++++++++++++++++++++++++++++++
 tests/generic/804.out |  10 ++++
 tests/generic/805     |  72 ++++++++++++++++++++++++++
 tests/generic/805.out |   2 +
 16 files changed, 735 insertions(+)
 create mode 100644 tests/btrfs/348
 create mode 100644 tests/btrfs/348.out
 create mode 100644 tests/generic/800
 create mode 100644 tests/generic/800.out
 create mode 100644 tests/generic/801
 create mode 100644 tests/generic/801.out
 create mode 100644 tests/generic/802
 create mode 100644 tests/generic/802.out
 create mode 100644 tests/generic/803
 create mode 100644 tests/generic/803.out
 create mode 100644 tests/generic/804
 create mode 100644 tests/generic/804.out
 create mode 100644 tests/generic/805
 create mode 100644 tests/generic/805.out

-- 
2.43.0


^ permalink raw reply

* [PATCH v3 1/9] fstests: add _loop_image_create_clone() helper
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Introduce _loop_image_create_clone() and _loop_image_destroy() to mkfs an
image file and clone it to another image file, and attach a loop device to
them. And its destroy part.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 common/rc | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/common/rc b/common/rc
index 9632b211b58f..0e7b7eb1d98f 100644
--- a/common/rc
+++ b/common/rc
@@ -1503,6 +1503,50 @@ _scratch_resvblks()
 	esac
 }
 
+_loop_image_create_clone()
+{
+	local -n _ret=$1
+	local pre_clone_tune_func=$2
+	local img_file=$TEST_DIR/${seq}.img
+	local img_file_clone=$TEST_DIR/${seq}_clone.img
+	local size=$(_small_fs_size_mb 128) # Smallest possible
+	local loop_devs
+
+	size=$((size * 1024 * 1024))
+	$XFS_IO_PROG -f -c "truncate $size" $img_file
+
+	loop_devs=$(_create_loop_device $img_file)
+	_ret=($loop_devs)
+
+	case $FSTYP in
+	xfs)
+		_mkfs_dev "-s size=4096" $img_file
+		;;
+	btrfs)
+		_mkfs_dev $img_file
+		;;
+	*)
+		_mkfs_dev $img_file
+		;;
+	esac
+
+	($pre_clone_tune_func)
+
+	cp $img_file $img_file_clone
+
+	loop_devs="$loop_devs $(_create_loop_device $img_file_clone)"
+
+	_ret=($loop_devs)
+}
+
+_loop_image_destroy()
+{
+	for d in "$@"; do
+		local f=$(losetup --noheadings --output BACK-FILE $d)
+		_destroy_loop_device "$d"
+		[ -n "$f" ] && rm -f "$f"
+	done
+}
 
 # Repair scratch filesystem.  Returns 0 if the FS is good to go (either no
 # errors found or errors were fixed) and nonzero otherwise; also spits out
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 2/9] fstests: add _clone_mount_option() helper
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Adds _clone_mount_option() helper function to handle filesystem-specific
requirements for mounting cloned devices. Abstract the need for -o nouuid
on XFS.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 common/rc | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/common/rc b/common/rc
index 0e7b7eb1d98f..2d28e174f605 100644
--- a/common/rc
+++ b/common/rc
@@ -397,6 +397,20 @@ _scratch_mount_options()
 					$SCRATCH_DEV $SCRATCH_MNT
 }
 
+_clone_mount_option()
+{
+	local mount_opts=""
+
+	case "$FSTYP" in
+	xfs)
+		mount_opts="-o nouuid"
+		;;
+	*)
+	esac
+
+	echo $mount_opts
+}
+
 _supports_filetype()
 {
 	local dir=$1
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 3/9] fstests: add test for inotify isolation on cloned devices
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Add a new test, to verify that the kernel correctly differentiates between
two block devices sharing the same FSID/UUID.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 common/config         |  1 +
 tests/generic/800     | 88 +++++++++++++++++++++++++++++++++++++++++++
 tests/generic/800.out |  7 ++++
 3 files changed, 96 insertions(+)
 create mode 100644 tests/generic/800
 create mode 100644 tests/generic/800.out

diff --git a/common/config b/common/config
index 4fd4c2c8af11..605a57947a40 100644
--- a/common/config
+++ b/common/config
@@ -242,6 +242,7 @@ export BTRFS_MAP_LOGICAL_PROG=$(type -P btrfs-map-logical)
 export PARTED_PROG="$(type -P parted)"
 export XFS_PROPERTY_PROG="$(type -P xfs_property)"
 export FSCRYPTCTL_PROG="$(type -P fscryptctl)"
+export INOTIFYWAIT_PROG="$(type -P inotifywait)"
 
 # udev wait functions.
 #
diff --git a/tests/generic/800 b/tests/generic/800
new file mode 100644
index 000000000000..16bc1159a2e1
--- /dev/null
+++ b/tests/generic/800
@@ -0,0 +1,88 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 800
+#
+# Verify if the kernel or userspace becomes confused when two block devices
+# share the same fid/fsid/uuid. Create inotify on both original and cloned
+# filesystem. Monitor the notification in the respective logs.
+
+. ./common/preamble
+
+_begin_fstest auto quick mount clone
+
+_require_test
+_require_loop
+_require_command "$INOTIFYWAIT_PROG" inotifywait
+
+_cleanup()
+{
+	cd /
+	[[ -n $pid1 ]] && { kill -TERM "$pid1" 2> /dev/null; wait $pid1; }
+	[[ -n $pid2 ]] && { kill -TERM "$pid2" 2> /dev/null; wait $pid2; }
+	rm -r -f $tmp.*
+	_unmount $mnt1 2>/dev/null
+	_unmount $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+devs=()
+_loop_image_create_clone devs
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+log1=$tmp.inotify1
+log2=$tmp.inotify2
+
+pid1=""
+pid2=""
+echo "Setup inotify watchers on both mnt1 and mnt2"
+$INOTIFYWAIT_PROG -m -e create --format '%f' $mnt1 > $log1 2>&1 &
+pid1=$!
+$INOTIFYWAIT_PROG -m -e create --format '%f' $mnt2 > $log2 2>&1 &
+pid2=$!
+sleep 2
+
+echo "Trigger file creation on mnt1"
+touch $mnt1/file_on_mnt1
+sync
+sleep 1
+
+echo "Trigger file creation on mnt2"
+touch $mnt2/file_on_mnt2
+sync
+sleep 1
+
+echo "Verify inotify isolation"
+kill $pid1 $pid2
+wait $pid1 $pid2 2>/dev/null
+pid1=""
+pid2=""
+
+if grep -q "file_on_mnt1" $log1 && ! grep -q "file_on_mnt2" $log1; then
+	echo "SUCCESS: mnt1 events isolated."
+else
+	echo "FAIL: mnt1 inotify confusion!"
+	[ ! -s $log1 ] && echo "  - mnt1 received no events."
+	grep -q "file_on_mnt2" $log1 && echo "  - mnt1 received event from mnt2."
+fi
+
+if grep -q "file_on_mnt2" $log2 && ! grep -q "file_on_mnt1" $log2; then
+	echo "SUCCESS: mnt2 events isolated."
+else
+	echo "FAIL: mnt2 inotify confusion!"
+	[ ! -s $log2 ] && echo "  - mnt2 received no events."
+	grep -q "file_on_mnt1" $log2 && echo "  - mnt2 received event from mnt1."
+fi
+
+status=0
+exit
diff --git a/tests/generic/800.out b/tests/generic/800.out
new file mode 100644
index 000000000000..b10842a31210
--- /dev/null
+++ b/tests/generic/800.out
@@ -0,0 +1,7 @@
+QA output created by 800
+Setup inotify watchers on both mnt1 and mnt2
+Trigger file creation on mnt1
+Trigger file creation on mnt2
+Verify inotify isolation
+SUCCESS: mnt1 events isolated.
+SUCCESS: mnt2 events isolated.
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 4/9] fstests: verify fanotify isolation on cloned filesystems
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Verify that fanotify events are correctly routed to the appropriate
watcher when cloned filesystems are mounted.
Helps verify kernel's event notification distinguishes between devices
sharing the same FSID/UUID.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 common/config         |   1 +
 tests/generic/801     | 115 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/801.out |   7 +++
 3 files changed, 123 insertions(+)
 create mode 100644 tests/generic/801
 create mode 100644 tests/generic/801.out

diff --git a/common/config b/common/config
index 605a57947a40..1588bdcb1aa1 100644
--- a/common/config
+++ b/common/config
@@ -243,6 +243,7 @@ export PARTED_PROG="$(type -P parted)"
 export XFS_PROPERTY_PROG="$(type -P xfs_property)"
 export FSCRYPTCTL_PROG="$(type -P fscryptctl)"
 export INOTIFYWAIT_PROG="$(type -P inotifywait)"
+export FSNOTIFYWAIT_PROG="$(type -P fsnotifywait)"
 
 # udev wait functions.
 #
diff --git a/tests/generic/801 b/tests/generic/801
new file mode 100644
index 000000000000..3f64e4de2206
--- /dev/null
+++ b/tests/generic/801
@@ -0,0 +1,115 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 801
+# Verify fanotify FID functionality on cloned filesystems by setting up
+# watchers and making sure notifications are in the correct logs files.
+
+. ./common/preamble
+
+_begin_fstest auto quick mount clone
+
+_require_test
+_require_loop
+_require_command "$FSNOTIFYWAIT_PROG" fsnotifywait
+
+[ "$FSTYP" = "ext4" ] && _fixed_by_kernel_commit xxxxxxxxxxxx \
+	"ext4: derive f_fsid from block device to avoid collisions"
+
+_cleanup()
+{
+	cd /
+	[[ -n $pid1 ]] && { kill -TERM "$pid1" 2> /dev/null; wait $pid1; }
+	[[ -n $pid2 ]] && { kill -TERM "$pid2" 2> /dev/null; wait $pid2; }
+	umount $mnt1 $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+	rm -r -f $tmp.*
+}
+
+monitor_fanotify()
+{
+	local mmnt=$1
+	exec stdbuf -oL $FSNOTIFYWAIT_PROG -m -F -S -e create "$mmnt" 2>&1
+}
+
+fsid_to_fid_parts()
+{
+	local fsid=$1
+	# Pad to 16 hex chars (64-bit), then split into two 32-bit halves
+	local padded=$(printf '%016x' "0x${fsid}")
+	local hi=$(printf '%x' "0x${padded:0:8}")   # strips leading zeros
+	local lo=$(printf '%x' "0x${padded:8:8}")   # strips leading zeros
+	echo "${hi}.${lo}"
+}
+
+devs=()
+_loop_image_create_clone devs
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+fsid1=$(stat -f -c "%i" $mnt1)
+fsid2=$(stat -f -c "%i" $mnt2)
+
+[[ "$fsid1" == "$fsid2" ]] && \
+	_notrun "Require clone filesystem with unique f_fsid"
+
+log1=$tmp.fanotify1
+log2=$tmp.fanotify2
+
+pid1=""
+pid2=""
+echo "Setup FID fanotify watchers on both mnt1 and mnt2"
+( monitor_fanotify "$mnt1" > "$log1" ) &
+pid1=$!
+( monitor_fanotify "$mnt2" > "$log2" ) &
+pid2=$!
+sleep 2
+
+echo "Trigger file creation on mnt1"
+touch $mnt1/file_on_mnt1
+sync
+sleep 1
+
+echo "Trigger file creation on mnt2"
+touch $mnt2/file_on_mnt2
+sync
+sleep 1
+
+echo "Verify fsid in the fanotify"
+kill $pid1 $pid2
+wait $pid1 $pid2 2>/dev/null
+pid1=""
+pid2=""
+
+e_fsid1=$(fsid_to_fid_parts "$fsid1")
+e_fsid2=$(fsid_to_fid_parts "$fsid2")
+
+echo $fsid1 $e_fsid1 $fsid2 $e_fsid2 >> $seqres.full
+cat $log1 >> $seqres.full
+cat $log2 >> $seqres.full
+
+if grep -qF "$e_fsid1" "$log1" && ! grep -qF "$e_fsid2" "$log1"; then
+	echo "SUCCESS: mnt1 events found"
+else
+	[ ! -s "$log1" ] && echo "  - mnt1 received no events."
+	grep -qF "$e_fsid2" "$log1" && echo "  - mnt1 received event from mnt2."
+fi
+
+if grep -qF "$e_fsid2" "$log2" && ! grep -qF "$e_fsid1" "$log2"; then
+	echo "SUCCESS: mnt2 events found"
+else
+	[ ! -s "$log2" ] && echo "  - mnt2 received no events."
+	grep -qF "$e_fsid1" "$log2" && echo "  - mnt2 received event from mnt1."
+fi
+
+status=0
+exit
diff --git a/tests/generic/801.out b/tests/generic/801.out
new file mode 100644
index 000000000000..d7b318d9f27c
--- /dev/null
+++ b/tests/generic/801.out
@@ -0,0 +1,7 @@
+QA output created by 801
+Setup FID fanotify watchers on both mnt1 and mnt2
+Trigger file creation on mnt1
+Trigger file creation on mnt2
+Verify fsid in the fanotify
+SUCCESS: mnt1 events found
+SUCCESS: mnt2 events found
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 5/9] fstests: verify f_fsid for cloned filesystems
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Verify that the cloned filesystem provides an f_fsid that is persistent
across mount cycles, yet unique from the original filesystem's f_fsid.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 tests/generic/802     | 61 +++++++++++++++++++++++++++++++++++++++++++
 tests/generic/802.out |  7 +++++
 2 files changed, 68 insertions(+)
 create mode 100644 tests/generic/802
 create mode 100644 tests/generic/802.out

diff --git a/tests/generic/802 b/tests/generic/802
new file mode 100644
index 000000000000..83e75dcd0d78
--- /dev/null
+++ b/tests/generic/802
@@ -0,0 +1,61 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 802
+# Verify f_fsid and s_uuid of cloned filesystems across mount cycle.
+
+. ./common/preamble
+
+_begin_fstest auto quick mount clone
+
+_require_test
+_require_loop
+
+[ "$FSTYP" = "btrfs" ] && _fixed_by_kernel_commit xxxxxxxxxxxx \
+	"btrfs: use on-disk uuid for s_uuid in temp_fsid mounts"
+[ "$FSTYP" = "btrfs" ] && _fixed_by_kernel_commit xxxxxxxxxxxx \
+	"btrfs: derive f_fsid from on-disk fsuuid and dev_t"
+
+_cleanup()
+{
+	cd /
+	rm -r -f $tmp.*
+	umount $mnt1 $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+devs=()
+_loop_image_create_clone devs
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+fsid_scratch=$(stat -f -c "%i" $mnt1)
+fsid_clone=$(stat -f -c "%i" $mnt2)
+
+echo "**** fsid initially ****"
+echo $fsid_scratch | sed -e "s/$fsid_scratch/FSID_SCRATCH/g"
+echo $fsid_clone | sed -e "s/$fsid_clone/FSID_CLONE/g"
+
+# Make sure fsid still match across a mount cycle, also reverse the order.
+echo "**** fsid after mount cycle ****"
+_unmount $mnt1
+_unmount $mnt2
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+
+stat -f -c "%i" $mnt1 | sed -e "s/$fsid_scratch/FSID_SCRATCH/g"
+stat -f -c "%i" $mnt2 | sed -e "s/$fsid_clone/FSID_CLONE/g"
+
+status=0
+exit
diff --git a/tests/generic/802.out b/tests/generic/802.out
new file mode 100644
index 000000000000..d1e008f122bb
--- /dev/null
+++ b/tests/generic/802.out
@@ -0,0 +1,7 @@
+QA output created by 802
+**** fsid initially ****
+FSID_SCRATCH
+FSID_CLONE
+**** fsid after mount cycle ****
+FSID_SCRATCH
+FSID_CLONE
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 6/9] fstests: verify libblkid resolution of duplicate UUIDs
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Verify how findmnt, df (libblkid) resolve device paths when multiple
block devices share the same FSUUID.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 tests/generic/803     | 75 +++++++++++++++++++++++++++++++++++++++++++
 tests/generic/803.out | 19 +++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 tests/generic/803
 create mode 100644 tests/generic/803.out

diff --git a/tests/generic/803 b/tests/generic/803
new file mode 100644
index 000000000000..1e0fbc768050
--- /dev/null
+++ b/tests/generic/803
@@ -0,0 +1,75 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 803
+# Verify how libblkid resolve devices when multiple devices sharing the
+# same FSUUID.
+
+. ./common/preamble
+. ./common/filter
+
+_begin_fstest auto quick mount clone
+
+_require_test
+_require_loop
+
+_cleanup()
+{
+	cd /
+	rm -r -f $tmp.*
+	umount $mnt1 $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+filter_pool()
+{
+	sed -e "s|${devs[0]}|DEV1|g" -e "s|${mnt1}|MNT1|g" \
+	    -e "s|${devs[1]}|DEV2|g" -e "s|${mnt2}|MNT2|g" | _filter_spaces
+}
+
+print_info()
+{
+	local mntpt=$1
+	local tgt=$(findmnt -no SOURCE $mntpt)
+	local fsuuid=$(blkid -s UUID -o value $tgt)
+
+	echo "mntpt=$mntpt tgt=$tgt fsuuid=$fsuuid" >> $seqres.full
+	echo
+	findmnt -o SOURCE,TARGET,UUID "$tgt" | tail -n +2 | \
+				sed -e "s/${fsuuid}/FSUUID/g" | filter_pool
+	awk -v dev="$tgt" '$1 == dev { print $1, $2 }' /proc/self/mounts | \
+								filter_pool
+	df --all --output=source,target "$tgt" | tail -n +2 | filter_pool
+}
+
+devs=()
+_loop_image_create_clone devs
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+print_info $mnt1
+print_info $mnt2
+
+echo
+echo "**** mount cycle ****"
+_unmount $mnt1
+_unmount $mnt2
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+
+print_info $mnt1
+print_info $mnt2
+
+status=0
+exit
diff --git a/tests/generic/803.out b/tests/generic/803.out
new file mode 100644
index 000000000000..20a1cb36a213
--- /dev/null
+++ b/tests/generic/803.out
@@ -0,0 +1,19 @@
+QA output created by 803
+
+DEV1 MNT1 FSUUID
+DEV1 MNT1
+DEV1 MNT1
+
+DEV2 MNT2 FSUUID
+DEV2 MNT2
+DEV2 MNT2
+
+**** mount cycle ****
+
+DEV1 MNT1 FSUUID
+DEV1 MNT1
+DEV1 MNT1
+
+DEV2 MNT2 FSUUID
+DEV2 MNT2
+DEV2 MNT2
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 7/9] fstests: verify IMA isolation on cloned filesystems
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Add testcase to verify IMA measurement isolation when multiple devices
share the same FSUUID.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 tests/generic/804     | 102 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/804.out |  10 +++++
 2 files changed, 112 insertions(+)
 create mode 100644 tests/generic/804
 create mode 100644 tests/generic/804.out

diff --git a/tests/generic/804 b/tests/generic/804
new file mode 100644
index 000000000000..5f5c04f97579
--- /dev/null
+++ b/tests/generic/804
@@ -0,0 +1,102 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 804
+# Verify IMA isolation on cloned filesystems:
+# . Mount two devices sharing the same FSUUID (cloned).
+# . Apply an IMA policy to measure files based on that FSUUID.
+# . Create unique files on each mount point to trigger measurements.
+# . Confirm the IMA log correctly attributes events to the respective mounts.
+
+. ./common/preamble
+. ./common/filter
+
+_begin_fstest auto quick clone
+
+_require_test
+_require_loop
+
+[ "$FSTYP" = "btrfs" ] && _fixed_by_kernel_commit xxxxxxxxxxxx \
+	"btrfs: use on-disk uuid for s_uuid in temp_fsid mounts"
+[ "$FSTYP" = "btrfs" ] && _fixed_by_kernel_commit xxxxxxxxxxxx \
+	"btrfs: derive f_fsid from on-disk fsuuid and dev_t"
+
+_cleanup()
+{
+	cd /
+	rm -r -f $tmp.*
+	_unmount $mnt1 2>/dev/null
+	_unmount $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+filter_pool()
+{
+	sed -e "s|${devs[0]}|DEV1|g" -e "s|$mnt1|MNT1|g" \
+	    -e "s|${devs[1]}|DEV2|g" -e "s|$mnt2|MNT2|g" | _filter_spaces
+}
+
+do_ima()
+{
+	local ima_policy="/sys/kernel/security/ima/policy"
+	local ima_log="/sys/kernel/security/ima/ascii_runtime_measurements"
+	local fsuuid
+	local mnt=$1
+	local enable=$2
+
+	# Since the in-memory IMA audit log is only cleared upon reboot,
+	# use unique random filenames to avoid log collisions.
+	local foofile=$(mktemp --dry-run foobar_XXXXX)
+
+	echo $mnt $enable | filter_pool
+
+	[ -w "$ima_policy" ] || _notrun "IMA policy not writable"
+
+	fsuuid=$(blkid -s UUID -o value ${devs[0]})
+
+	# Load IMA policy to measure file access specifically for this
+	# filesystem UUID.
+	if [[ $enable -eq 1 ]]; then
+		echo "measure func=FILE_CHECK fsuuid=$fsuuid" > "$ima_policy" || \
+			_notrun "Policy rejected"
+	fi
+
+	# Create a file to trigger measurement and verify its entry in
+	# the IMA log.
+	echo "test_data" > $mnt/$foofile
+
+	# For $ima_log column entry please ref to
+	grep $foofile "$ima_log" | awk '{ print $5 }' | filter_pool | \
+						sed "s/$foofile/FOOBAR_FILE/"
+
+	echo "dbg: $mnt $fsuuid $foofile" >> $seqres.full
+	cat $ima_log | tail -1 >> $seqres.full
+	echo >> $seqres.full
+}
+
+devs=()
+_loop_image_create_clone devs
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+do_ima $mnt1 1
+do_ima $mnt2 0
+
+# Btrfs uses in-memory dynamic temp_fsid
+echo mount cycle
+_unmount $mnt2
+_mount $mount_opts ${devs[1]} $mnt2 || _fail "Failed to mount dev2"
+
+do_ima $mnt1 0
+do_ima $mnt2 0
+
+status=0
+exit
diff --git a/tests/generic/804.out b/tests/generic/804.out
new file mode 100644
index 000000000000..9804181d6c17
--- /dev/null
+++ b/tests/generic/804.out
@@ -0,0 +1,10 @@
+QA output created by 804
+MNT1 1
+MNT1/FOOBAR_FILE
+MNT2 0
+MNT2/FOOBAR_FILE
+mount cycle
+MNT1 0
+MNT1/FOOBAR_FILE
+MNT2 0
+MNT2/FOOBAR_FILE
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 8/9] fstests: verify exportfs file handles on cloned filesystems
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Ensure that exportfs can correctly decode file handles on a cloned
filesystem across a mount cycle, by file handles generated on a
cloned device remain valid after mount cycle.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 tests/generic/805     | 72 +++++++++++++++++++++++++++++++++++++++++++
 tests/generic/805.out |  2 ++
 2 files changed, 74 insertions(+)
 create mode 100644 tests/generic/805
 create mode 100644 tests/generic/805.out

diff --git a/tests/generic/805 b/tests/generic/805
new file mode 100644
index 000000000000..18c92205304c
--- /dev/null
+++ b/tests/generic/805
@@ -0,0 +1,72 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test No. 805
+
+. ./common/preamble
+
+_begin_fstest auto quick exportfs clone
+
+_require_test
+_require_exportfs
+_require_loop
+_require_test_program "open_by_handle"
+
+_cleanup()
+{
+	cd /
+	rm -r -f $tmp.*
+	_unmount $mnt1 2>/dev/null
+	_unmount $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+# Create test dir and test files, encode file handles and store to tmp file
+create_test_files()
+{
+	rm -rf $testdir
+	mkdir -p $testdir
+	$here/src/open_by_handle -cwp -o $tmp.handles_file $testdir $NUMFILES
+}
+
+# Decode file handles loaded from tmp file
+test_file_handles()
+{
+	local opt=$1
+	local when=$2
+
+	echo test_file_handles after $when
+	$here/src/open_by_handle $opt -i $tmp.handles_file $mnt2 $NUMFILES
+}
+
+devs=()
+_loop_image_create_clone devs
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+NUMFILES=1
+testdir=$mnt2/testdir
+
+# Decode file handles of files/dir after cycle mount
+create_test_files
+
+_unmount $mnt1
+_unmount $mnt2
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+
+test_file_handles -rp "cycle mount"
+
+status=0
+exit
diff --git a/tests/generic/805.out b/tests/generic/805.out
new file mode 100644
index 000000000000..29b11ec77ffb
--- /dev/null
+++ b/tests/generic/805.out
@@ -0,0 +1,2 @@
+QA output created by 805
+test_file_handles after cycle mount
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 9/9] fstests: btrfs: test UUID consistency for clones with metadata_uuid
From: Anand Jain @ 2026-04-27 10:19 UTC (permalink / raw)
  To: fstests
  Cc: linux-btrfs, linux-ext4, linux-xfs, linux-f2fs, amir73il, zlang,
	hch
In-Reply-To: <cover.1777281778.git.asj@kernel.org>

Btrfs uses the metadata_uuid superblock feature to change the on-disk UUID
without rewriting every block header. This patch adds a sanity check to
ensure UUID consistency when a filesystem with metadata_uuid enabled is
cloned.

Signed-off-by: Anand Jain <asj@kernel.org>
---
 tests/btrfs/348     | 91 +++++++++++++++++++++++++++++++++++++++++++++
 tests/btrfs/348.out | 19 ++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 tests/btrfs/348
 create mode 100644 tests/btrfs/348.out

diff --git a/tests/btrfs/348 b/tests/btrfs/348
new file mode 100644
index 000000000000..cfa56fdb4a4a
--- /dev/null
+++ b/tests/btrfs/348
@@ -0,0 +1,91 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 Anand Jain <asj@kernel.org>.  All Rights Reserved.
+#
+# FS QA Test 348
+#
+# Verify that the cloned filesystem UUID remains consistent, even when the
+# `metadata_uuid` feature is enabled.
+#
+
+. ./common/preamble
+. ./common/filter
+
+_begin_fstest auto quick mount clone
+
+_require_test
+_require_loop
+
+_cleanup()
+{
+	cd /
+	rm -r -f $tmp.*
+	umount $mnt1 $mnt2 2>/dev/null
+	_loop_image_destroy "${devs[@]}" 2> /dev/null
+}
+
+filter_pool()
+{
+	sed -e "s|${devs[0]}|DEV1|g" -e "s|${mnt1}|MNT1|g" \
+	    -e "s|${devs[1]}|DEV2|g" -e "s|${mnt2}|MNT2|g" | _filter_spaces
+}
+
+pre_clone_tune()
+{
+	local temp_mnt=$TEST_DIR/${seq}_mnt
+
+	_require_command "$BTRFS_TUNE_PROG" btrfstune
+	mkdir -p $temp_mnt
+	_mount ${devs[0]} $temp_mnt
+	$BTRFS_UTIL_PROG subvolume create $temp_mnt/sv1 &> /dev/null
+	_unmount $temp_mnt
+	rm -r -f $temp_mnt
+
+	$BTRFS_TUNE_PROG -m ${devs[0]}
+}
+
+print_info()
+{
+	local mntpt=$1
+	local tgt=$(findmnt -no SOURCE $mntpt)
+	local fsuuid=$(blkid -s UUID -o value $tgt)
+
+	echo "mntpt=$mntpt tgt=$tgt fsuuid=$fsuuid" >> $seqres.full
+	echo
+	findmnt -o SOURCE,TARGET,UUID "$tgt" | tail -n +2 | \
+				sed -e "s/${fsuuid}/FSUUID/g" | filter_pool
+	awk -v dev="$tgt" '$1 == dev { print $1, $2 }' /proc/self/mounts | \
+								filter_pool
+	df --all --output=source,target "$tgt" | tail -n +2 | filter_pool
+}
+
+devs=()
+_loop_image_create_clone devs pre_clone_tune
+mkdir -p $TEST_DIR/$seq
+mnt1=$TEST_DIR/$seq/mnt1
+mnt2=$TEST_DIR/$seq/mnt2
+mkdir -p $mnt1
+mkdir -p $mnt2
+
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+
+print_info $mnt1
+print_info $mnt2
+
+echo
+echo "**** mount cycle ****"
+_unmount $mnt1
+_unmount $mnt2
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[1]} $mnt2 || \
+						_fail "Failed to mount dev2"
+_mount $(_common_dev_mount_options) $(_clone_mount_option) ${devs[0]} $mnt1 || \
+						_fail "Failed to mount dev1"
+
+print_info $mnt1
+print_info $mnt2
+
+status=0
+exit
diff --git a/tests/btrfs/348.out b/tests/btrfs/348.out
new file mode 100644
index 000000000000..4b102e986246
--- /dev/null
+++ b/tests/btrfs/348.out
@@ -0,0 +1,19 @@
+QA output created by 348
+
+DEV1 MNT1 FSUUID
+DEV1 MNT1
+DEV1 MNT1
+
+DEV2 MNT2 FSUUID
+DEV2 MNT2
+DEV2 MNT2
+
+**** mount cycle ****
+
+DEV1 MNT1 FSUUID
+DEV1 MNT1
+DEV1 MNT1
+
+DEV2 MNT2 FSUUID
+DEV2 MNT2
+DEV2 MNT2
-- 
2.43.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox