Linux userland API discussions
 help / color / mirror / Atom feed
* [PATCH v14 05/15] ntfs3: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:52 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Report NTFS case sensitivity behavior via the FS_XFLAG_CASEFOLD
flag. NTFS always preserves case at rest.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/ntfs3/file.c    | 29 +++++++++++++++++++++++++++++
 fs/ntfs3/namei.c   |  1 +
 fs/ntfs3/ntfs_fs.h |  1 +
 3 files changed, 31 insertions(+)

diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index b041639ab406..ad9350d7fc3f 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -180,6 +180,34 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
 }
 #endif
 
+/*
+ * ntfs_fileattr_get - inode_operations::fileattr_get
+ */
+int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+	struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+
+	/* Avoid any operation if inode is bad. */
+	if (unlikely(is_bad_ni(ntfs_i(inode))))
+		return -EINVAL;
+
+	/*
+	 * NTFS preserves case (the default). Case sensitivity depends on
+	 * mount options: with "nocase", NTFS is case-insensitive;
+	 * otherwise it is case-sensitive.
+	 */
+	if (sbi->options->nocase) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	if (inode->i_flags & S_IMMUTABLE) {
+		fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+		fa->flags |= FS_IMMUTABLE_FL;
+	}
+	return 0;
+}
+
 /*
  * ntfs_getattr - inode_operations::getattr
  */
@@ -1547,6 +1575,7 @@ const struct inode_operations ntfs_file_inode_operations = {
 	.get_acl	= ntfs_get_acl,
 	.set_acl	= ntfs_set_acl,
 	.fiemap		= ntfs_fiemap,
+	.fileattr_get	= ntfs_fileattr_get,
 };
 
 const struct file_operations ntfs_file_operations = {
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index b2af8f695e60..e159ba66a34a 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -518,6 +518,7 @@ const struct inode_operations ntfs_dir_inode_operations = {
 	.getattr	= ntfs_getattr,
 	.listxattr	= ntfs_listxattr,
 	.fiemap		= ntfs_fiemap,
+	.fileattr_get	= ntfs_fileattr_get,
 };
 
 const struct inode_operations ntfs_special_inode_operations = {
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index bbf3b6a1dcbe..41db22d652c4 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -529,6 +529,7 @@ bool dir_is_empty(struct inode *dir);
 extern const struct file_operations ntfs_dir_operations;
 
 /* Globals from file.c */
+int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
 int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 		 struct kstat *stat, u32 request_mask, u32 flags);
 int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 06/15] hfs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:52 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Report HFS case sensitivity behavior via the FS_XFLAG_CASEFOLD
flag. HFS is always case-insensitive (using Mac OS Roman case
folding) and always preserves case at rest.

Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/hfs/dir.c    |  1 +
 fs/hfs/hfs_fs.h |  2 ++
 fs/hfs/inode.c  | 14 ++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index f5e7efe924e7..c4c6e1623f55 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -328,4 +328,5 @@ const struct inode_operations hfs_dir_inode_operations = {
 	.rmdir		= hfs_remove,
 	.rename		= hfs_rename,
 	.setattr	= hfs_inode_setattr,
+	.fileattr_get	= hfs_fileattr_get,
 };
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ac0e83f77a0f..1b23448c9a48 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -177,6 +177,8 @@ extern int hfs_get_block(struct inode *inode, sector_t block,
 extern const struct address_space_operations hfs_aops;
 extern const struct address_space_operations hfs_btree_aops;
 
+struct file_kattr;
+int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
 int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
 		    loff_t pos, unsigned int len, struct folio **foliop,
 		    void **fsdata);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 89b33a9d46d5..f41cc261684d 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -18,6 +18,7 @@
 #include <linux/uio.h>
 #include <linux/xattr.h>
 #include <linux/blkdev.h>
+#include <linux/fileattr.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
@@ -699,6 +700,18 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 	return ret;
 }
 
+int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	/*
+	 * HFS compares filenames using Mac OS Roman case folding, so
+	 * lookup is always case-insensitive. Names are stored on disk
+	 * with case intact; CASENONPRESERVING stays clear.
+	 */
+	fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+	fa->flags |= FS_CASEFOLD_FL;
+	return 0;
+}
+
 static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -715,4 +728,5 @@ static const struct inode_operations hfs_file_inode_operations = {
 	.lookup		= hfs_file_lookup,
 	.setattr	= hfs_inode_setattr,
 	.listxattr	= generic_listxattr,
+	.fileattr_get	= hfs_fileattr_get,
 };

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 07/15] hfsplus: Report case sensitivity in fileattr_get
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Add case sensitivity reporting to the existing hfsplus_fileattr_get()
function via the FS_XFLAG_CASEFOLD flag. HFS+ always preserves case
at rest.

Case sensitivity depends on how the volume was formatted: HFSX
volumes may be either case-sensitive or case-insensitive, indicated
by the HFSPLUS_SB_CASEFOLD superblock flag.

FS_XFLAG_CASEFOLD is read-only: FS_XFLAG_RDONLY_MASK ensures
FS_IOC_FSSETXATTR strips it. The legacy FS_IOC_SETFLAGS path in
hfsplus_fileattr_set() also allows FS_CASEFOLD_FL through its
allowlist on case-insensitive volumes so that a chattr
read-modify-write cycle does not fail with EOPNOTSUPP.

Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/hfsplus/inode.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d05891ec492e..5565c14b4bf6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -740,6 +740,7 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
 {
 	struct inode *inode = d_inode(dentry);
 	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
 	unsigned int flags = 0;
 
 	if (inode->i_flags & S_IMMUTABLE)
@@ -748,6 +749,8 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
 		flags |= FS_APPEND_FL;
 	if (hip->userflags & HFSPLUS_FLG_NODUMP)
 		flags |= FS_NODUMP_FL;
+	if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags))
+		flags |= FS_CASEFOLD_FL;
 
 	fileattr_fill_flags(fa, flags);
 
@@ -759,13 +762,24 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_inode(dentry);
 	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
+	unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL;
 	unsigned int new_fl = 0;
 
 	if (fileattr_has_fsx(fa))
 		return -EOPNOTSUPP;
 
+	/*
+	 * FS_CASEFOLD_FL reflects HFSPLUS_SB_CASEFOLD, a mount-time
+	 * property. Accept it as a no-op so chattr's RMW round-trip
+	 * succeeds; reject any attempt to enable it on a volume that
+	 * was not formatted case-insensitive.
+	 */
+	if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags))
+		allowed |= FS_CASEFOLD_FL;
+
 	/* don't silently ignore unsupported ext2 flags */
-	if (fa->flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
+	if (fa->flags & ~allowed)
 		return -EOPNOTSUPP;
 
 	if (fa->flags & FS_IMMUTABLE_FL)

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 08/15] xfs: Report case sensitivity in fileattr_get
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need to query whether a filesystem
is case-sensitive. Add FS_XFLAG_CASEFOLD to xfs_ip2xflags()
when the filesystem is formatted with the ASCIICI feature
flag. This serves both FS_IOC_FSGETXATTR (via xfs_fill_fsxattr()
in xfs_fileattr_get()) and XFS_IOC_BULKSTAT (which populates
bs_xflags directly from xfs_ip2xflags()), so bulkstat consumers
and per-inode queries see a consistent view of the filesystem's
case-folding behavior.

FS_XFLAG_CASEFOLD is read-only: FS_XFLAG_RDONLY_MASK ensures
FS_IOC_FSSETXATTR strips it, and xfs_flags2diflags() has no
clause for CASEFOLD so the on-disk diflags are unaffected.
The legacy FS_IOC_SETFLAGS path in xfs_fileattr_set() also
allows FS_CASEFOLD_FL through its allowlist on ASCIICI
filesystems so that a chattr read-modify-write cycle does
not fail with EOPNOTSUPP.

XFS always preserves case. XFS is case-sensitive by default,
but supports ASCII case-insensitive lookups when formatted
with the ASCIICI feature flag.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_util.c |  2 ++
 fs/xfs/xfs_ioctl.c             | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index 551fa51befb6..82be54b6f8d3 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -130,6 +130,8 @@ xfs_ip2xflags(
 
 	if (xfs_inode_has_attr_fork(ip))
 		flags |= FS_XFLAG_HASATTR;
+	if (xfs_has_asciici(ip->i_mount))
+		flags |= FS_XFLAG_CASEFOLD;
 	return flags;
 }
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ed9b4846c05f..f8216f74679f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -755,9 +755,23 @@ xfs_fileattr_set(
 	trace_xfs_ioctl_setattr(ip);
 
 	if (!fa->fsx_valid) {
-		if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL |
-				  FS_NOATIME_FL | FS_NODUMP_FL |
-				  FS_SYNC_FL | FS_DAX_FL | FS_PROJINHERIT_FL))
+		unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL |
+				       FS_NOATIME_FL | FS_NODUMP_FL |
+				       FS_SYNC_FL | FS_DAX_FL |
+				       FS_PROJINHERIT_FL;
+
+		/*
+		 * FS_CASEFOLD_FL reflects the ASCIICI superblock feature,
+		 * a read-only property. Accept it as a no-op so chattr's
+		 * RMW round-trip succeeds; reject any attempt to enable
+		 * it on a non-ASCIICI filesystem. xfs_flags2diflags()
+		 * has no clause for CASEFOLD, so the bit is dropped from
+		 * the on-disk diflags regardless.
+		 */
+		if (xfs_has_asciici(mp))
+			allowed |= FS_CASEFOLD_FL;
+
+		if (fa->flags & ~allowed)
 			return -EOPNOTSUPP;
 	}
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 09/15] cifs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Steve French, Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need a way to query whether a filesystem
handles filenames in a case-sensitive manner. Report CIFS/SMB case
handling behavior via FS_XFLAG_CASEFOLD and
FS_XFLAG_CASENONPRESERVING.

The authoritative source is the server itself: at mount time CIFS
issues QueryFSInfo(FS_ATTRIBUTE_INFORMATION) and caches the reply
on the tcon. That reply carries FILE_CASE_SENSITIVE_SEARCH and
FILE_CASE_PRESERVED_NAMES, which reflect whatever case handling
the share actually implements after SMB3.1.1 POSIX extensions
negotiation. Translating those two bits into the VFS flags lets
cifs_fileattr_get report what the server advertises rather than
what the client was asked to pretend.

QueryFSInfo is best-effort; the mount completes even if the server
does not answer. MaxPathNameComponentLength is zero in that case
and is used as the "no reply received" sentinel. When no reply is
available, fall back to the nocase mount option so that the reported
behavior agrees with the dentry comparison operations installed on
the superblock.

The callback is registered on cifs_dir_inode_ops so that NFSD,
ksmbd, and other consumers querying case handling against a
directory get a definitive answer, and on cifs_file_inode_ops to
preserve FS_COMPR_FL reporting on regular files. cifs_set_ops()
also installs cifs_namespace_inode_operations on DFS referral
directories that carry IS_AUTOMOUNT; register the same callback
there so the answer does not depend on whether the directory is
a referral point.

Registering fileattr_get routes FS_IOC_GETFLAGS through
vfs_fileattr_get() and short-circuits the syscall's fallback to
cifs_ioctl(). That fallback invoked CIFSGetExtAttr() under
CONFIG_CIFS_POSIX and CONFIG_CIFS_ALLOW_INSECURE_LEGACY on servers
advertising CIFS_UNIX_EXTATTR_CAP, surfacing the SMB1 Unix-extension
immutable, append, and nodump bits. cifs_fileattr_get carries over
only FS_COMPR_FL from cached cifsAttrs; the SMB1 extattr fetch is
not reproduced. SMB1 is deprecated, and acquiring a netfid from
within a dentry-only callback is not worth preserving a path tied
to an insecure legacy dialect.

Acked-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/smb/client/cifsfs.c    | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/smb/client/cifsfs.h    |  3 +++
 fs/smb/client/namespace.c |  1 +
 3 files changed, 57 insertions(+)

diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 2025739f070a..6c113ae7fdd3 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -30,6 +30,7 @@
 #include <linux/xattr.h>
 #include <linux/mm.h>
 #include <linux/key-type.h>
+#include <linux/fileattr.h>
 #include <uapi/linux/magic.h>
 #include <net/ipv6.h>
 #include "cifsfs.h"
@@ -1199,6 +1200,56 @@ struct file_system_type smb3_fs_type = {
 MODULE_ALIAS_FS("smb3");
 MODULE_ALIAS("smb3");
 
+int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+	struct inode *inode = d_inode(dentry);
+	u32 attrs;
+
+	/* Preserve FS_COMPR_FL previously reported by cifs_ioctl(). */
+	if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED)
+		fa->flags |= FS_COMPR_FL;
+
+	/*
+	 * FS_CASEFOLD_FL is defined by UAPI as a folder attribute,
+	 * and userspace tools (e.g., lsattr) display it only on
+	 * directories. Confine the case-handling bits to directories
+	 * to match that convention; for non-directories the share's
+	 * case semantics are still discoverable through the parent.
+	 */
+	if (!S_ISDIR(inode->i_mode))
+		return 0;
+
+	/*
+	 * The server's FS_ATTRIBUTE_INFORMATION response, cached on
+	 * the tcon at mount, reflects the share's case-handling
+	 * semantics after any POSIX extensions negotiation. Prefer
+	 * it over the client-local nocase mount option, which only
+	 * governs dentry comparison on this superblock.
+	 *
+	 * QueryFSInfo is best-effort at mount; when it did not
+	 * populate fsAttrInfo, MaxPathNameComponentLength remains
+	 * zero. In that case fall back to nocase so the reporting
+	 * matches the comparison behavior installed on the sb.
+	 */
+	if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) == 0) {
+		if (tcon->nocase) {
+			fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+			fa->flags |= FS_CASEFOLD_FL;
+		}
+		return 0;
+	}
+	attrs = le32_to_cpu(tcon->fsAttrInfo.Attributes);
+	if (!(attrs & FILE_CASE_SENSITIVE_SEARCH)) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	if (!(attrs & FILE_CASE_PRESERVED_NAMES))
+		fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+	return 0;
+}
+
 const struct inode_operations cifs_dir_inode_ops = {
 	.create = cifs_create,
 	.atomic_open = cifs_atomic_open,
@@ -1217,6 +1268,7 @@ const struct inode_operations cifs_dir_inode_ops = {
 	.listxattr = cifs_listxattr,
 	.get_acl = cifs_get_acl,
 	.set_acl = cifs_set_acl,
+	.fileattr_get = cifs_fileattr_get,
 };
 
 const struct inode_operations cifs_file_inode_ops = {
@@ -1227,6 +1279,7 @@ const struct inode_operations cifs_file_inode_ops = {
 	.fiemap = cifs_fiemap,
 	.get_acl = cifs_get_acl,
 	.set_acl = cifs_set_acl,
+	.fileattr_get = cifs_fileattr_get,
 };
 
 const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 7370b38da938..5f0d459d1a89 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -89,6 +89,9 @@ extern const struct inode_operations cifs_file_inode_ops;
 extern const struct inode_operations cifs_symlink_inode_ops;
 extern const struct inode_operations cifs_namespace_inode_operations;
 
+struct file_kattr;
+int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 
 /* Functions related to files and directories */
 extern const struct netfs_request_ops cifs_req_ops;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index 52a520349cb7..52a51b032fae 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -294,4 +294,5 @@ struct vfsmount *cifs_d_automount(struct path *path)
 }
 
 const struct inode_operations cifs_namespace_inode_operations = {
+	.fileattr_get	= cifs_fileattr_get,
 };

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 10/15] nfs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

An NFS server re-exporting an NFS mount point needs to report
the case sensitivity behavior of the underlying filesystem to
its clients. NFSD's attribute encoder obtains that information
by calling vfs_fileattr_get() on the lower filesystem, so the
NFS client must implement fileattr_get to surface what it
learned from its own server.

The NFS client already retrieves case sensitivity information
from servers during mount via PATHCONF (NFSv3) or the
FATTR4_CASE_INSENSITIVE/FATTR4_CASE_PRESERVING attributes
(NFSv4). Expose this information through fileattr_get by
reporting the FS_XFLAG_CASEFOLD and FS_XFLAG_CASENONPRESERVING
flags. NFSv2 lacks PATHCONF support, so mounts using that protocol
version default to standard POSIX behavior: case-sensitive and
case-preserving.

PATHCONF is now invoked unconditionally for NFSv2 and NFSv3 mounts
so the case-sensitivity capabilities are established even when the
user pins server->namelen with the namlen= mount option. That option
is orthogonal to case handling, and skipping PATHCONF because
namelen was already known would leave the caps unset.

The two capability bits carry opposite polarity because their POSIX
defaults differ. Most servers are case-sensitive and case-
preserving, matching "neither xflag set." NFS_CAP_CASE_INSENSITIVE
is set only when the server affirms case insensitivity, so "server
said no" and "server did not answer" both collapse to the case-
sensitive default. NFS_CAP_CASE_NONPRESERVING follows the same
pattern in the opposite direction: set only when the server affirms
that it does not preserve case, so that silence or a missing
attribute lands on the case-preserving default. The NFSv4 probe
checks res.attr_bitmask[0] to distinguish "server said false" from
"server omitted the attribute" before setting the bit.

Both capability bits are cleared before each probe so a remount,
an NFSv4 transparent state migration to a server with different
case semantics, or a probe whose reply does not arrive does not
retain stale capabilities from the prior probe.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/client.c           | 21 +++++++++++++++------
 fs/nfs/inode.c            | 15 +++++++++++++++
 fs/nfs/internal.h         |  3 +++
 fs/nfs/namespace.c        |  2 ++
 fs/nfs/nfs3proc.c         |  2 ++
 fs/nfs/nfs3xdr.c          |  7 +++++--
 fs/nfs/nfs4proc.c         | 10 +++++++---
 fs/nfs/proc.c             |  3 +++
 fs/nfs/symlink.c          |  3 +++
 include/linux/nfs_fs_sb.h |  2 +-
 include/linux/nfs_xdr.h   |  2 ++
 11 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index be02bb227741..3db2f18315b8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -914,6 +914,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
  */
 static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
 {
+	struct nfs_pathconf pathinfo = { };
 	struct nfs_fsinfo fsinfo;
 	struct nfs_client *clp = server->nfs_client;
 	int error;
@@ -933,15 +934,23 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
 
 	nfs_server_set_fsinfo(server, &fsinfo);
 
-	/* Get some general file system info */
-	if (server->namelen == 0) {
-		struct nfs_pathconf pathinfo;
+	pathinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
 
-		pathinfo.fattr = fattr;
-		nfs_fattr_init(fattr);
+	/* Clear before probing so a failed RPC does not retain stale bits. */
+	if (clp->rpc_ops->version < 4)
+		server->caps &= ~(NFS_CAP_CASE_INSENSITIVE |
+				  NFS_CAP_CASE_NONPRESERVING);
 
-		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+	if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) {
+		if (server->namelen == 0)
 			server->namelen = pathinfo.max_namelen;
+		if (clp->rpc_ops->version < 4) {
+			if (pathinfo.case_insensitive)
+				server->caps |= NFS_CAP_CASE_INSENSITIVE;
+			if (!pathinfo.case_preserving)
+				server->caps |= NFS_CAP_CASE_NONPRESERVING;
+		}
 	}
 
 	if (clp->rpc_ops->discover_trunking != NULL &&
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 98a8f0de1199..fdcbe6f2052c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -41,6 +41,7 @@
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
 #include <linux/iversion.h>
+#include <linux/fileattr.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -1101,6 +1102,20 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 }
 EXPORT_SYMBOL_GPL(nfs_getattr);
 
+int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	if (nfs_server_capable(inode, NFS_CAP_CASE_NONPRESERVING))
+		fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_fileattr_get);
+
 static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
 {
 	refcount_set(&l_ctx->count, 1);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index fc5456377160..309d3f679bb3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -449,6 +449,9 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
 extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
 extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
 
+struct file_kattr;
+int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 /* localio.c */
 struct nfs_local_dio {
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index af9be0c5f516..6d0073c24771 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -246,11 +246,13 @@ nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 const struct inode_operations nfs_mountpoint_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 const struct inode_operations nfs_referral_inode_operations = {
 	.getattr	= nfs_namespace_getattr,
 	.setattr	= nfs_namespace_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 95d7cd564b74..b80d0c5efc27 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -1053,6 +1053,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 #ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= nfs3_listxattr,
 	.get_inode_acl	= nfs3_get_acl,
@@ -1064,6 +1065,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 #ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= nfs3_listxattr,
 	.get_inode_acl	= nfs3_get_acl,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e17d72908412..e745e78faab0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2276,8 +2276,11 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
 	if (unlikely(!p))
 		return -EIO;
 	result->max_link = be32_to_cpup(p++);
-	result->max_namelen = be32_to_cpup(p);
-	/* ignore remaining fields */
+	result->max_namelen = be32_to_cpup(p++);
+	p++;	/* ignore no_trunc */
+	p++;	/* ignore chown_restricted */
+	result->case_insensitive = be32_to_cpup(p++) != 0;
+	result->case_preserving = be32_to_cpup(p) != 0;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d839a97df822..62f66684fbc8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3933,7 +3933,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		server->caps &=
 			~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS |
 			  NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS |
-			  NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME);
+			  NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME |
+			  NFS_CAP_CASE_INSENSITIVE | NFS_CAP_CASE_NONPRESERVING);
 		server->fattr_valid = NFS_ATTR_FATTR_V4;
 		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
 				res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
@@ -3944,8 +3945,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 			server->caps |= NFS_CAP_SYMLINKS;
 		if (res.case_insensitive)
 			server->caps |= NFS_CAP_CASE_INSENSITIVE;
-		if (res.case_preserving)
-			server->caps |= NFS_CAP_CASE_PRESERVING;
+		if ((res.attr_bitmask[0] & FATTR4_WORD0_CASE_PRESERVING) &&
+		    !res.case_preserving)
+			server->caps |= NFS_CAP_CASE_NONPRESERVING;
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
 		if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
 			server->caps |= NFS_CAP_SECURITY_LABEL;
@@ -10598,6 +10600,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.listxattr	= nfs4_listxattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static const struct inode_operations nfs4_file_inode_operations = {
@@ -10605,6 +10608,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.listxattr	= nfs4_listxattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static struct nfs_server *nfs4_clone_server(struct nfs_server *source,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 70795684b8e8..03c2c1f31be9 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -598,6 +598,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 {
 	info->max_link = 0;
 	info->max_namelen = NFS2_MAXNAMLEN;
+	info->case_preserving = true;
 	return 0;
 }
 
@@ -718,12 +719,14 @@ static const struct inode_operations nfs_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 static const struct inode_operations nfs_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
 
 const struct nfs_rpc_ops nfs_v2_clientops = {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 58146e935402..74a072896f8d 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -22,6 +22,8 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
+#include "internal.h"
+
 /* Symlink caching in the page cache is even more simplistic
  * and straight-forward than readdir caching.
  */
@@ -74,4 +76,5 @@ const struct inode_operations nfs_symlink_inode_operations = {
 	.get_link	= nfs_get_link,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.fileattr_get	= nfs_fileattr_get,
 };
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4daee27fa5eb..34d294774f8c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -306,7 +306,7 @@ struct nfs_server {
 #define NFS_CAP_ATOMIC_OPEN	(1U << 4)
 #define NFS_CAP_LGOPEN		(1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE	(1U << 6)
-#define NFS_CAP_CASE_PRESERVING	(1U << 7)
+#define NFS_CAP_CASE_NONPRESERVING	(1U << 7)
 #define NFS_CAP_REBOOT_LAYOUTRETURN	(1U << 8)
 #define NFS_CAP_OFFLOAD_STATUS	(1U << 9)
 #define NFS_CAP_ZERO_RANGE	(1U << 10)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ff1f12aa73d2..7c2057e40f99 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -182,6 +182,8 @@ struct nfs_pathconf {
 	struct nfs_fattr	*fattr; /* Post-op attributes */
 	__u32			max_link; /* max # of hard links */
 	__u32			max_namelen; /* max name length */
+	bool			case_insensitive;
+	bool			case_preserving;
 };
 
 struct nfs4_change_info {

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 11/15] vboxsf: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need a way to query whether a
filesystem handles filenames in a case-sensitive manner. Report
VirtualBox shared folder case handling behavior via the
FS_XFLAG_CASEFOLD flag.

The case sensitivity property is queried from the VirtualBox host
service at mount time and cached in struct vboxsf_sbi. The host
determines case sensitivity based on the underlying host filesystem
(for example, Windows NTFS is case-insensitive while Linux ext4 is
case-sensitive).

VirtualBox shared folders always preserve filename case exactly
as provided by the guest. The host interface does not expose a
separate case-preserving property; leaving
FS_XFLAG_CASENONPRESERVING unset reports the POSIX-default
case-preserving behavior, which matches vboxsf semantics.

The callback is registered in all three inode_operations
structures (directory, file, and symlink) to ensure consistent
reporting across all inode types.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/vboxsf/dir.c    |  1 +
 fs/vboxsf/file.c   |  6 ++++--
 fs/vboxsf/super.c  |  7 +++++++
 fs/vboxsf/utils.c  | 30 ++++++++++++++++++++++++++++++
 fs/vboxsf/vfsmod.h |  6 ++++++
 5 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 42bedc4ec7af..c5bd3271aa96 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -477,4 +477,5 @@ const struct inode_operations vboxsf_dir_iops = {
 	.symlink = vboxsf_dir_symlink,
 	.getattr = vboxsf_getattr,
 	.setattr = vboxsf_setattr,
+	.fileattr_get = vboxsf_fileattr_get,
 };
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 7a7a3fbb2651..943953867e18 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -222,7 +222,8 @@ const struct file_operations vboxsf_reg_fops = {
 
 const struct inode_operations vboxsf_reg_iops = {
 	.getattr = vboxsf_getattr,
-	.setattr = vboxsf_setattr
+	.setattr = vboxsf_setattr,
+	.fileattr_get = vboxsf_fileattr_get,
 };
 
 static int vboxsf_read_folio(struct file *file, struct folio *folio)
@@ -389,5 +390,6 @@ static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
 }
 
 const struct inode_operations vboxsf_lnk_iops = {
-	.get_link = vboxsf_get_link
+	.get_link = vboxsf_get_link,
+	.fileattr_get = vboxsf_fileattr_get,
 };
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index a618cb093e00..a61fbab51d37 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -185,6 +185,13 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
 	if (err)
 		goto fail_unmap;
 
+	/*
+	 * A failed query leaves sbi->case_insensitive false, so the
+	 * mount defaults to reporting case-sensitive behavior. Do not
+	 * fail the mount over an advisory attribute.
+	 */
+	vboxsf_query_case_sensitive(sbi);
+
 	sb->s_magic = VBOXSF_SUPER_MAGIC;
 	sb->s_blocksize = 1024;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 440e8c50629d..298bfc93255c 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -11,6 +11,7 @@
 #include <linux/sizes.h>
 #include <linux/pagemap.h>
 #include <linux/vfs.h>
+#include <linux/fileattr.h>
 #include "vfsmod.h"
 
 struct inode *vboxsf_new_inode(struct super_block *sb)
@@ -567,3 +568,32 @@ int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
 
 	return err;
 }
+
+int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi)
+{
+	struct shfl_volinfo volinfo = {};
+	u32 buf_len;
+	int err;
+
+	buf_len = sizeof(volinfo);
+	err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME,
+			    &buf_len, &volinfo);
+	if (err)
+		return err;
+	if (buf_len < sizeof(volinfo))
+		return 0;
+
+	sbi->case_insensitive = !volinfo.properties.case_sensitive;
+	return 0;
+}
+
+int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+
+	if (sbi->case_insensitive) {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	return 0;
+}
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
index 05973eb89d52..b61afd0ce842 100644
--- a/fs/vboxsf/vfsmod.h
+++ b/fs/vboxsf/vfsmod.h
@@ -47,6 +47,7 @@ struct vboxsf_sbi {
 	u32 next_generation;
 	u32 root;
 	int bdi_id;
+	bool case_insensitive;
 };
 
 /* per-inode information */
@@ -111,6 +112,11 @@ void vboxsf_dir_info_free(struct vboxsf_dir_info *p);
 int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
 			u64 handle);
 
+int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi);
+
+struct file_kattr;
+int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 /* from vboxsf_wrappers.c */
 int vboxsf_connect(void);
 void vboxsf_disconnect(void);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 12/15] isofs: Implement fileattr_get for case sensitivity
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

Upper layers such as NFSD need a way to query whether a
filesystem handles filenames in a case-sensitive manner so
they can provide correct semantics to remote clients. Without
this information, NFS exports of ISO 9660 filesystems cannot
advertise their filename case behavior.

Implement isofs_fileattr_get() to report ISO 9660 case handling
behavior. The 'check=r' (relaxed) mount option enables
case-insensitive lookups and is reported via FS_XFLAG_CASEFOLD.
By default, Joliet extensions operate in relaxed mode while
plain ISO 9660 uses strict (case-sensitive) mode.

Plain ISO 9660 names on the medium are uppercase. When neither
Rock Ridge nor Joliet is in effect, the default 'map=n' option
(and 'map=a') routes lookup and readdir through
isofs_name_translate(), which forces A-Z to a-z. The names
visible to userspace then differ in case from the on-disc form,
so report FS_XFLAG_CASENONPRESERVING in that configuration. Rock
Ridge and Joliet both deliver names as authored, and 'map=o'
emits the raw on-disc name unchanged, so those configurations
remain case-preserving.

Casefolding is a directory property, and the in-tree consumers
(NFSD, ksmbd) issue the query against a directory: NFSD walks
to the parent for non-directory dentries before calling
vfs_fileattr_get(), and ksmbd reports per-share attributes from
the share root. Wire .fileattr_get only on
isofs_dir_inode_operations. The CASEFOLD flag is set in both
fa->fsx_xflags and fa->flags so FS_IOC_FSGETXATTR and
FS_IOC_GETFLAGS agree.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/isofs/dir.c   | 16 ++++++++++++++++
 fs/isofs/isofs.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2fd9948d606e..55385a72a4ce 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -14,6 +14,7 @@
 #include <linux/gfp.h>
 #include <linux/filelock.h>
 #include "isofs.h"
+#include <linux/fileattr.h>
 
 int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode)
 {
@@ -267,6 +268,20 @@ static int isofs_readdir(struct file *file, struct dir_context *ctx)
 	return result;
 }
 
+int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
+{
+	struct isofs_sb_info *sbi = ISOFS_SB(dentry->d_sb);
+
+	if (sbi->s_check == 'r') {
+		fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
+		fa->flags |= FS_CASEFOLD_FL;
+	}
+	if (!sbi->s_joliet_level && !sbi->s_rock &&
+	    (sbi->s_mapping == 'n' || sbi->s_mapping == 'a'))
+		fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING;
+	return 0;
+}
+
 const struct file_operations isofs_dir_operations =
 {
 	.llseek = generic_file_llseek,
@@ -281,6 +296,7 @@ const struct file_operations isofs_dir_operations =
 const struct inode_operations isofs_dir_inode_operations =
 {
 	.lookup = isofs_lookup,
+	.fileattr_get = isofs_fileattr_get,
 };
 
 
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 506555837533..0ec8b24a42ed 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -197,6 +197,9 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 	}
 }
 
+struct file_kattr;
+int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+
 extern const struct inode_operations isofs_dir_inode_operations;
 extern const struct file_operations isofs_dir_operations;
 extern const struct address_space_operations isofs_symlink_aops;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 13/15] nfsd: Report export case-folding via NFSv3 PATHCONF
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

The hard-coded MSDOS_SUPER_MAGIC check in nfsd3_proc_pathconf()
only recognizes FAT filesystems as case-insensitive. Modern
filesystems like F2FS, exFAT, and CIFS support case-insensitive
directories, but NFSv3 clients cannot discover this capability.

Query the export's actual case behavior through ->fileattr_get
instead. This allows NFSv3 clients to correctly handle case
sensitivity for any filesystem that implements the fileattr
interface. Filesystems without ->fileattr_get continue to report
the default POSIX behavior (case-sensitive, case-preserving).

This change depends on the earlier "fat: Implement fileattr_get
for case sensitivity" patch in this series, which ensures FAT
filesystems report their case behavior correctly via the
fileattr interface.

Case-folding is a per-directory property, so
nfsd_get_case_info() queries the parent dentry for
non-directory filehandles. Three inherent corner cases follow:
a single-file export's parent lies outside the exported
subtree, so the LSM hook evaluates against an unexported
directory; a disconnected dentry from fh_verify() has
d_parent == itself, so the file's own attributes are reported
until the dentry connects; and a hardlinked file resolves
through the alias the dcache currently holds, so when the
inode is linked into both case-folded and case-sensitive
directories the reported value tracks whichever parent is
active. These limitations are not addressable without
redefining the protocol attribute as per-parent rather than
per-object.

RFC 1813 restricts PATHCONF errors to NFS3ERR_STALE,
NFS3ERR_BADHANDLE, and NFS3ERR_SERVERFAULT. When an LSM hook
denies the case-folding query on the parent, NFS3ERR_STALE is
the only correct mapping: NFS3ERR_SERVERFAULT misrepresents a
working server as broken, and NFS3ERR_BADHANDLE implies a
decoding failure that did not occur. A client purging the
filehandle on receipt is the desired outcome, since the server
has refused to read attributes through it. Substituting POSIX
defaults instead would let the same handle report
casefold=false now and casefold=true once policy permits,
opening a silent name-collision window on case-insensitive
exports.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs3proc.c | 36 +++++++++++++++++-----
 fs/nfsd/vfs.c      | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/vfs.h      |  3 ++
 fs/nfsd/xdr3.h     |  4 +--
 4 files changed, 121 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 42adc5461db0..12b9172c6be1 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -710,23 +710,43 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 	resp->p_name_max = 255;		/* at least */
 	resp->p_no_trunc = 0;
 	resp->p_chown_restricted = 1;
-	resp->p_case_insensitive = 0;
-	resp->p_case_preserving = 1;
+	resp->p_case_insensitive = false;
+	resp->p_case_preserving = true;
 
 	resp->status = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
 
 	if (resp->status == nfs_ok) {
 		struct super_block *sb = argp->fh.fh_dentry->d_sb;
+		int err;
 
-		/* Note that we don't care for remote fs's here */
-		switch (sb->s_magic) {
-		case EXT2_SUPER_MAGIC:
+		if (sb->s_magic == EXT2_SUPER_MAGIC) {
 			resp->p_link_max = EXT2_LINK_MAX;
 			resp->p_name_max = EXT2_NAME_LEN;
+		}
+
+		err = nfsd_get_case_info(argp->fh.fh_dentry,
+					 &resp->p_case_insensitive,
+					 &resp->p_case_preserving);
+		/*
+		 * RFC 1813 lists NFS3ERR_STALE, NFS3ERR_BADHANDLE, and
+		 * NFS3ERR_SERVERFAULT as the only PATHCONF errors.
+		 */
+		switch (err) {
+		case 0:
+		case -EOPNOTSUPP:
+			/* Both arms leave the output booleans valid. */
 			break;
-		case MSDOS_SUPER_MAGIC:
-			resp->p_case_insensitive = 1;
-			resp->p_case_preserving  = 0;
+		case -EACCES:
+		case -EPERM:
+			/*
+			 * Policy denied the query. Report STALE so the
+			 * handle is unusable without implying a server
+			 * malfunction.
+			 */
+			resp->status = nfserr_stale;
+			break;
+		default:
+			resp->status = nfserr_serverfault;
 			break;
 		}
 	}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index eafdf7b7890f..85ff418127c7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -32,6 +32,7 @@
 #include <linux/writeback.h>
 #include <linux/security.h>
 #include <linux/sunrpc/xdr.h>
+#include <linux/fileattr.h>
 
 #include "xdr3.h"
 
@@ -2891,3 +2892,90 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
 
 	return err? nfserrno(err) : 0;
 }
+
+/**
+ * nfsd_get_case_info - get case sensitivity info for a dentry
+ * @dentry: dentry to query
+ * @case_insensitive: set to true if name comparison ignores case
+ * @case_preserving: set to true if case is preserved on disk
+ *
+ * On casefold-capable filesystems the flag lives on the directory,
+ * not on its entries, so for a non-directory @dentry the parent is
+ * queried instead. A directory (including an export root, whose
+ * parent lies outside the export) is queried as-is so its own
+ * contents' lookup behavior is reported. NFSD advertises
+ * fattr4_homogeneous as FALSE, so per-directory answers may differ
+ * within an export.
+ *
+ * The probe runs with kernel credentials. case_insensitive and
+ * case_preserving describe the directory's structural lookup
+ * behavior, not the caller's identity; running under the calling
+ * client's mapped credentials would let per-client MAC policy on
+ * the parent directory turn this query into NFS4ERR_ACCESS even
+ * though the underlying property is the same for every client.
+ *
+ * When the filesystem does not expose case-folding state (no
+ * ->fileattr_get, or the callback returns -EOPNOTSUPP /
+ * -ENOIOCTLCMD / -ENOTTY / -EINVAL), the outputs are filled with
+ * POSIX defaults (case-sensitive, case-preserving) on the premise
+ * that a filesystem with case-folding support wires up
+ * fileattr_get.
+ *
+ * Return: 0 with outputs filled, -EOPNOTSUPP with outputs filled
+ *         to POSIX defaults, or a negative errno (e.g., -EIO,
+ *         -ESTALE, -ENOMEM) with outputs unmodified.
+ */
+int
+nfsd_get_case_info(struct dentry *dentry, bool *case_insensitive,
+		   bool *case_preserving)
+{
+	struct file_kattr fa = {};
+	const struct cred *saved;
+	struct cred *probe;
+	struct dentry *cd;
+	bool put = false;
+	int err;
+
+	if (d_is_dir(dentry)) {
+		cd = dentry;
+	} else {
+		cd = dget_parent(dentry);
+		put = true;
+	}
+
+	probe = prepare_creds();
+	if (!probe) {
+		err = -ENOMEM;
+		goto out;
+	}
+	probe->fsuid = GLOBAL_ROOT_UID;
+	probe->fsgid = GLOBAL_ROOT_GID;
+	saved = override_creds(probe);
+
+	err = vfs_fileattr_get(cd, &fa);
+
+	put_cred(revert_creds(saved));
+out:
+	if (put)
+		dput(cd);
+	switch (err) {
+	case 0:
+		*case_insensitive = fa.fsx_xflags & FS_XFLAG_CASEFOLD;
+		*case_preserving =
+			!(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING);
+		return 0;
+	case -EINVAL:
+	case -ENOTTY:
+	case -ENOIOCTLCMD:
+	case -EOPNOTSUPP:
+		/*
+		 * Filesystem does not expose case state.
+		 * Report POSIX defaults.
+		 */
+		*case_insensitive = false;
+		*case_preserving = true;
+		return -EOPNOTSUPP;
+	default:
+		return err;
+	}
+}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 702a844f2106..e09ea04a51b9 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -156,6 +156,9 @@ __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
 			     loff_t *, struct readdir_cd *, nfsd_filldir_t);
 __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *, int access);
+int		nfsd_get_case_info(struct dentry *dentry,
+				   bool *case_insensitive,
+				   bool *case_preserving);
 
 __be32		nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
 				struct dentry *dentry, int acc);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 522067b7fd75..a7c9714b0b0e 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -209,8 +209,8 @@ struct nfsd3_pathconfres {
 	__u32			p_name_max;
 	__u32			p_no_trunc;
 	__u32			p_chown_restricted;
-	__u32			p_case_insensitive;
-	__u32			p_case_preserving;
+	bool			p_case_insensitive;
+	bool			p_case_preserving;
 };
 
 struct nfsd3_commitres {

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 14/15] nfsd: Implement NFSv4 FATTR4_CASE_INSENSITIVE and FATTR4_CASE_PRESERVING
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

NFSD currently provides NFSv4 clients with hard-coded responses
indicating all exported filesystems are case-sensitive and
case-preserving. This is incorrect for case-insensitive filesystems
and ext4 directories with casefold enabled.

Query the underlying filesystem's actual case sensitivity via
nfsd_get_case_info() and return accurate values to clients. This
supports per-directory settings for filesystems that allow mixing
case-sensitive and case-insensitive directories within an export.

The helper queries the parent dentry for non-directory filehandles
because case-folding is a per-directory property. That resolution
has the same corner cases here as for NFSv3 PATHCONF: single-file
exports query an unexported parent, disconnected dentries report
defaults until reconnected, and hardlinked files track whichever
alias the dcache currently holds.

Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2a0946c630e1..319007b79d49 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3158,6 +3158,8 @@ struct nfsd4_fattr_args {
 	u32			rdattr_err;
 	bool			contextsupport;
 	bool			ignore_crossmnt;
+	bool			case_insensitive;
+	bool			case_preserving;
 };
 
 typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr,
@@ -3356,6 +3358,33 @@ static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr,
 	return nfs_ok;
 }
 
+static __be32 nfsd4_encode_fattr4_case_insensitive(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	return nfsd4_encode_bool(xdr, args->case_insensitive);
+}
+
+static __be32 nfsd4_encode_fattr4_case_preserving(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	return nfsd4_encode_bool(xdr, args->case_preserving);
+}
+
+static __be32 nfsd4_encode_fattr4_homogeneous(struct xdr_stream *xdr,
+					const struct nfsd4_fattr_args *args)
+{
+	/*
+	 * Casefold-capable filesystems (e.g. ext4 or f2fs with the
+	 * casefold feature) attach a Unicode encoding at mount time
+	 * but apply case folding per directory.  The per-file-system
+	 * case_insensitive and case_preserving values can therefore
+	 * legitimately differ across objects that share the same fsid.
+	 * Report FATTR4_HOMOGENEOUS = FALSE on such filesystems to
+	 * keep that variation consistent with RFC 8881 Section 5.8.2.16.
+	 */
+	return nfsd4_encode_bool(xdr, !sb_has_encoding(args->dentry->d_sb));
+}
+
 static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr,
 					     const struct nfsd4_fattr_args *args)
 {
@@ -3748,8 +3777,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 	[FATTR4_ACLSUPPORT]		= nfsd4_encode_fattr4_aclsupport,
 	[FATTR4_ARCHIVE]		= nfsd4_encode_fattr4__noop,
 	[FATTR4_CANSETTIME]		= nfsd4_encode_fattr4__true,
-	[FATTR4_CASE_INSENSITIVE]	= nfsd4_encode_fattr4__false,
-	[FATTR4_CASE_PRESERVING]	= nfsd4_encode_fattr4__true,
+	[FATTR4_CASE_INSENSITIVE]	= nfsd4_encode_fattr4_case_insensitive,
+	[FATTR4_CASE_PRESERVING]	= nfsd4_encode_fattr4_case_preserving,
 	[FATTR4_CHOWN_RESTRICTED]	= nfsd4_encode_fattr4__true,
 	[FATTR4_FILEHANDLE]		= nfsd4_encode_fattr4_filehandle,
 	[FATTR4_FILEID]			= nfsd4_encode_fattr4_fileid,
@@ -3758,7 +3787,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 	[FATTR4_FILES_TOTAL]		= nfsd4_encode_fattr4_files_total,
 	[FATTR4_FS_LOCATIONS]		= nfsd4_encode_fattr4_fs_locations,
 	[FATTR4_HIDDEN]			= nfsd4_encode_fattr4__noop,
-	[FATTR4_HOMOGENEOUS]		= nfsd4_encode_fattr4__true,
+	[FATTR4_HOMOGENEOUS]		= nfsd4_encode_fattr4_homogeneous,
 	[FATTR4_MAXFILESIZE]		= nfsd4_encode_fattr4_maxfilesize,
 	[FATTR4_MAXLINK]		= nfsd4_encode_fattr4_maxlink,
 	[FATTR4_MAXNAME]		= nfsd4_encode_fattr4_maxname,
@@ -3968,6 +3997,23 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		args.fhp = tempfh;
 	} else
 		args.fhp = fhp;
+	if (attrmask[0] & (FATTR4_WORD0_CASE_INSENSITIVE |
+			   FATTR4_WORD0_CASE_PRESERVING)) {
+		err = nfsd_get_case_info(dentry, &args.case_insensitive,
+					 &args.case_preserving);
+		/*
+		 * Per RFC 8881 Section 18.7.3, an attribute advertised
+		 * in SUPPORTED_ATTRS must come back with a value or the
+		 * GETATTR must fail. nfsd_get_case_info() fills POSIX
+		 * defaults and returns -EOPNOTSUPP when the underlying
+		 * filesystem does not expose case state; encode those
+		 * defaults so the reply agrees with what SUPPORTED_ATTRS
+		 * advertises. Other errors fail the operation as the
+		 * spec requires.
+		 */
+		if (err && err != -EOPNOTSUPP)
+			goto out_nfserr;
+	}
 
 	if (attrmask[0] & FATTR4_WORD0_ACL) {
 		err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v14 15/15] ksmbd: Report filesystem case sensitivity via FS_ATTRIBUTE_INFORMATION
From: Chuck Lever @ 2026-05-07  8:53 UTC (permalink / raw)
  To: Al Viro, Christian Brauner, Jan Kara
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Chuck Lever,
	Roland Mainz
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

From: Chuck Lever <chuck.lever@oracle.com>

FS_ATTRIBUTE_INFORMATION responses have always reported
FILE_CASE_SENSITIVE_SEARCH and FILE_CASE_PRESERVED_NAMES
unconditionally. Case-insensitive filesystems like exFAT, and
casefolded directories on ext4 or f2fs, have no way to signal
their actual semantics to SMB clients.

Now that filesystems expose case behavior through ->fileattr_get,
query it via vfs_fileattr_get() and translate the FS_XFLAG_CASEFOLD
and FS_XFLAG_CASENONPRESERVING flags into the corresponding SMB
attributes. Filesystems without ->fileattr_get continue reporting
default POSIX behavior (case-sensitive, case-preserving).

SMB's FS_ATTRIBUTE_INFORMATION reports per-share attributes from
the share root, not per-file. Shares mixing casefold and
non-casefold directories report the root directory's behavior.

Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Reviewed-by: Roland Mainz <roland.mainz@nrubsig.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/smb/server/smb2pdu.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ee32e61b6d3c..cf0bc453a036 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -14,6 +14,7 @@
 #include <linux/falloc.h>
 #include <linux/mount.h>
 #include <linux/filelock.h>
+#include <linux/fileattr.h>
 
 #include "glob.h"
 #include "smbfsctl.h"
@@ -5541,16 +5542,33 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 	case FS_ATTRIBUTE_INFORMATION:
 	{
 		FILE_SYSTEM_ATTRIBUTE_INFO *info;
+		struct file_kattr fa = {};
 		size_t sz;
+		u32 attrs;
+		int err;
 
 		info = (FILE_SYSTEM_ATTRIBUTE_INFO *)rsp->Buffer;
-		info->Attributes = cpu_to_le32(FILE_SUPPORTS_OBJECT_IDS |
-					       FILE_PERSISTENT_ACLS |
-					       FILE_UNICODE_ON_DISK |
-					       FILE_CASE_PRESERVED_NAMES |
-					       FILE_CASE_SENSITIVE_SEARCH |
-					       FILE_SUPPORTS_BLOCK_REFCOUNTING);
+		attrs = FILE_SUPPORTS_OBJECT_IDS |
+			FILE_PERSISTENT_ACLS |
+			FILE_UNICODE_ON_DISK |
+			FILE_SUPPORTS_BLOCK_REFCOUNTING;
 
+		err = vfs_fileattr_get(path.dentry, &fa);
+		/*
+		 * -EINVAL, -EOPNOTSUPP: ntfs-3g and other FUSE
+		 * filesystems that lack FS_IOC_FSGETXATTR support.
+		 */
+		if (err && err != -ENOIOCTLCMD && err != -ENOTTY &&
+		    err != -EINVAL && err != -EOPNOTSUPP) {
+			path_put(&path);
+			return err;
+		}
+		if (!(fa.fsx_xflags & FS_XFLAG_CASEFOLD))
+			attrs |= FILE_CASE_SENSITIVE_SEARCH;
+		if (!(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING))
+			attrs |= FILE_CASE_PRESERVED_NAMES;
+
+		info->Attributes = cpu_to_le32(attrs);
 		info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps);
 
 		if (test_share_config_flag(work->tcon->share_conf,

-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH] crypto: af_alg - Document the deprecation of AF_ALG
From: Kamran Khan @ 2026-05-10 15:54 UTC (permalink / raw)
  To: Jeff Barnes, Andy Lutomirski
  Cc: Eric Biggers, linux-crypto@vger.kernel.org, Herbert Xu,
	linux-doc@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Linus Torvalds
In-Reply-To: <14A441D8-5370-44BE-8732-99BF8107C3FD@getmailspring.com>

Hi,

AF_ALG is useful not just for hardware-offloading, but also for memory 
isolation so that applications only get oracle access to the crypto keys 
and a memory-safety vulnerability in user applications would not 
immediately put the secret key material at risk.

I understand and appreciate the concern with complex attack surface and 
the increased frequency of attacks in this area. But I fear that 
completely removing AF_ALG increases the risk for userspace applications 
relying on it for memory isolation.

What alternatives do userspace applications have on Linux for ensuring 
crypto keys are not exposed in user memory? That is, FreeBSD and NetBSD 
natively provide /dev/crypto; removing AF_ALG would kill the only 
equivalent option on the Linux side for kernel-delegated cryptography.

Thanks,
Kamran.

On 5/6/26 7:42 AM, Jeff Barnes wrote:
> Hi,
> 
> On May 5 2026, at 7:17 pm, Andy Lutomirski <luto@amacapital.net> wrote:
> 
>>> On Apr 29, 2026, at 6:19 PM, Eric Biggers <ebiggers@kernel.org> wrote:
>>>   
>>> AF_ALG is almost completely unnecessary, and it exposes a massive attack
>>> surface that hasn't been standing up to modern vulnerability discovery
>>> tools.  The latest one even has its own website, providing a small
>>> Python script that reliably roots most Linux distros: https://copy.fail/
>>   
>> How about adding a configuration option, defaulted on, that requires
>> capable(CAP_SYS_ADMIN) to create the socket (and maybe also to bind /
>> connect it).  And a sysctl to allow the administrator to override this
>> in the unlikely event that it’s needed.
>>   
>> IIRC cryptsetup used to and maybe even still does require these
>> sockets sometimes and this would let it keep working.  And there's all
>> the FIPS stuff downthread.
> 
> Apologize in advance for the long-winded answer.
> 
> The "FIPS stuff" centers on using sha512hmac -> libkcapi -> AF_ALG for
> verifying integrity. The early‑boot sha512hmac check that some
> distributions use (typically from initramfs) sits at an awkward
> intersection of multiple standards, and it may help to clarify where it
> actually fits and where it doesn't.
> 
>  From a standards perspective, FIPS 140‑3 requires a cryptographic module
> to perform self‑integrity verification using an approved algorithm and
> to prevent the module from entering an operational state on failure. In
> the Linux kernel, the cryptographic module is the kernel crypto
> subsystem, and these requirements are met by the kernel’s internal
> power‑up self‑tests (KATs, etc.) on the crypto code and critical data as
> loaded into memory.
> 
> FIPS 199 / SP 800‑53 (e.g., SI‑7) impose system‑level integrity
> requirements (for Moderate impact systems), i.e., that unauthorized
> modification of critical components is prevented or detected and that
> failures result in a protective action. These controls are explicitly
> technology‑agnostic and are not limited to cryptographic‑module self‑tests.
> 
> The sha512hmac check is not the FIPS 140‑3 cryptographic‑module
> self‑integrity test. Instead, it has historically been used as a system
> integrity control that provides auditors with assurance that the kernel
> image containing the cryptographic module has not been modified prior to
> execution, and that a failure will halt the boot.
> 
> Although FIPS 140‑3 does not mandate an HMAC over the kernel image, the
> early‑boot HMAC became an accepted evidence pattern for satisfying
> system‑integrity expectations (FIPS 199 / SI‑7) alongside a kernel
> crypto validation. This is why it is often perceived as “required” for
> FIPS submissions, even though it is not normatively required by
> FIPS 140‑3 itself.
> 
> With the deprecation/removal of AF_ALG for this use case, there is no
> longer a supported way to perform an early‑boot, userspace‑driven HMAC
> using validated kernel crypto without introducing circular dependencies
> (e.g., relying on userspace crypto before crypto self‑tests complete).
> As a result, there is no drop‑in replacement for sha512hmac that
> preserves all of its historical properties.
> 
> This is a new development that challenges a long‑standing assumption:
> that system‑integrity evidence and cryptographic‑module self‑integrity
> can be cleanly separated while still being demonstrated by a single
> early‑boot mechanism. That assumption no longer holds given proposed
> kernel interfaces.
> 
> A more accurate decomposition (and one that aligns with the intent of
> the standards) is to separate integrity enforcement by system phase.
> 
> 1. Secure Boot (or equivalent platform verification) ensures that a
> modified kernel image is not executed at all. This satisfies the
> requirement that critical components are not loaded in a modified state
> and that integrity failure results in a protective action (boot prevention).
> 
> 2. IMA (with appraisal and enforcement) ensures that modified
> executables, modules, or firmware cannot be loaded or executed once the
> kernel is running.
> 
> 3. Kernel crypto self‑tests continue to satisfy FIPS 140‑3
> self‑integrity requirements independently of the above.
> 
> Taken together, Secure Boot + IMA provide continuous system‑integrity
> enforcement without re‑introducing early‑boot HMACs or AF_ALG
> dependencies, while keeping cryptographic‑module self‑integrity
> correctly scoped to the kernel crypto subsystem.
> 
> The transition away from sha512hmac is therefore not a removal of
> integrity enforcement, but a shift from a single, early‑boot mechanism
> to a phased integrity model that better reflects the separation of
> concerns already present in the standards — even though this separation
> was previously masked by the hacky HMAC approach.
> 
> This change will require updated documentation and auditor education,
> but it reflects the current technical reality and avoids perpetuating an
> interface that no longer has a sustainable implementation path.
> 
>>   
>>   
>>>   
>>> This isn't sustainable, especially as LLMs have accelerated the rate the
>>> vulnerabilities are coming in.  The effort that is being put into this
>>> thing is vastly disproportional to the few programs that actually use
>>> it, and those programs would be better served by userspace code anyway.
>>>   
>>> These issues have been noted in many mailing list discussions already.
>>> But until now they haven't been reflected in the documentation or
>>> kconfig menu itself, and the vulnerabilities are still coming in.
>>>   
>>> Let's go ahead and document the deprecation.
>>>   
>>> This isn't intended to change anything overnight.  After all, most Linux
>>> distros won't be able to disable the kconfig options quite yet, mainly
>>> because of iwd.  But this should create a bit more impetus for these
>>> userspace programs to be fixed, and the documentation update should also
>>> help prevent more users from appearing.
>>>   
>>> Signed-off-by: Eric Biggers <ebiggers@kernel.org>
>>> ---
>>>   
>>> This patch is targeting crypto/master
>>>   
>>> Documentation/crypto/userspace-if.rst | 82 ++++++++++++++++++++-------
>>> crypto/Kconfig                        | 69 ++++++++++++++++------
>>> 2 files changed, 113 insertions(+), 38 deletions(-)
>>>   
>>> diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
>>> index 021759198fe7..c39f5c79a5b7 100644
>>> --- a/Documentation/crypto/userspace-if.rst
>>> +++ b/Documentation/crypto/userspace-if.rst
>>> @@ -2,30 +2,72 @@ User Space Interface
>>> ====================
>>>   
>>> Introduction
>>> ------------
>>>   
>>> -The concepts of the kernel crypto API visible to kernel space is fully
>>> -applicable to the user space interface as well. Therefore, the kernel
>>> -crypto API high level discussion for the in-kernel use cases applies
>>> -here as well.
>>> -
>>> -The major difference, however, is that user space can only act as a
>>> -consumer and never as a provider of a transformation or cipher
>>> -algorithm.
>>> -
>>> -The following covers the user space interface exported by the kernel
>>> -crypto API. A working example of this description is libkcapi that can
>>> -be obtained from [1]. That library can be used by user space
>>> -applications that require cryptographic services from the kernel.
>>> -
>>> -Some details of the in-kernel kernel crypto API aspects do not apply to
>>> -user space, however. This includes the difference between synchronous
>>> -and asynchronous invocations. The user space API call is fully
>>> -synchronous.
>>> -
>>> -[1] https://www.chronox.de/libkcapi/index.html
>>> +AF_ALG provides unprivileged userspace programs access to arbitrary hash,
>>> +symmetric cipher, AEAD, and RNG algorithms that are implemented in kernel-mode
>>> +code.
>>> +
>>> +AF_ALG is insecure and is deprecated. Originally added to the kernel
>>> in 2010,
>>> +most kernel developers now consider it to be a mistake.
>>> +
>>> +AF_ALG continues to be supported only for backwards compatibility.
>>> On systems
>>> +where no programs using AF_ALG remain, the support for it should be
>>> disabled by
>>> +disabling ``CONFIG_CRYPTO_USER_API_*``.
>>> +
>>> +Deprecation
>>> +-----------
>>> +
>>> +AF_ALG was originally intended to provide userspace programs access
>>> to crypto
>>> +accelerators that they wouldn't otherwise have access to.
>>> +
>>> +However, that capability turned out to not be useful on very many
>>> systems. More
>>> +significantly, the actual implementation exposes a vastly greater
>>> amount of
>>> +functionality than that. It actually provides access to all software algorithms.
>>> +
>>> +This includes arbitrary compositions of different algorithms created
>>> via a
>>> +complex template system, as well as algorithms that only make sense
>>> as internal
>>> +implementation details of other algorithms. It also includes full zero-copy
>>> +support, which is difficult for the kernel to implement securely.
>>> +
>>> +Ultimately, these algorithms are just math computations. They use
>>> the same
>>> +instructions that userspace programs already have access to, just
>>> accessed in a
>>> +much more convoluted and less efficient way.
>>> +
>>> +Indeed, userspace code is nearly always what is being used anyway.
>>> These same
>>> +algorithms are widely implemented in userspace crypto libraries.
>>> +
>>> +Meanwhile, AF_ALG hasn't been withstanding modern vulnerability
>>> discovery tools
>>> +such as syzbot and large language models. It receives a steady
>>> stream of CVEs.
>>> +Some of the examples include:
>>> +
>>> +- CVE-2026-31677
>>> +- CVE-2026-31431 (https://copy.fail)
>>> +- CVE-2025-38079
>>> +- CVE-2025-37808
>>> +- CVE-2024-26824
>>> +- CVE-2022-48781
>>> +- CVE-2019-8912
>>> +- CVE-2018-14619
>>> +- CVE-2017-18075
>>> +- CVE-2017-17806
>>> +- CVE-2017-17805
>>> +- CVE-2016-10147
>>> +- CVE-2015-8970
>>> +- CVE-2015-3331
>>> +- CVE-2014-9644
>>> +- CVE-2013-7421
>>> +- CVE-2011-4081
>>> +
>>> +It is recommended that, whenever possible, userspace programs be
>>> migrated to
>>> +userspace crypto code (which again, is what is normally used anyway) and
>>> +``CONFIG_CRYPTO_USER_API_*`` be disabled.  On systems that use
>>> SELinux, SELinux
>>> +can also be used to restrict the use of AF_ALG to trusted programs.
>>> +
>>> +The remainder of this documentation provides the historical
>>> documentation for
>>> +the deprecated AF_ALG interface.
>>>   
>>> User Space API General Remarks
>>> ------------------------------
>>>   
>>> The kernel crypto API is accessible from user space. Currently, the
>>> diff --git a/crypto/Kconfig b/crypto/Kconfig
>>> index 103d1f58cb7c..6cd1c478d4be 100644
>>> --- a/crypto/Kconfig
>>> +++ b/crypto/Kconfig
>>> @@ -1278,48 +1278,72 @@ config CRYPTO_DF80090A
>>>     tristate
>>>     select CRYPTO_AES
>>>     select CRYPTO_CTR
>>>   
>>> endmenu
>>> -menu "Userspace interface"
>>> +menu "Userspace interface (deprecated)"
>>>   
>>> config CRYPTO_USER_API
>>>     tristate
>>>   
>>> config CRYPTO_USER_API_HASH
>>> -    tristate "Hash algorithms"
>>> +    tristate "Hash algorithms (deprecated)"
>>>     depends on NET
>>>     select CRYPTO_HASH
>>>     select CRYPTO_USER_API
>>>     help
>>> -      Enable the userspace interface for hash algorithms.
>>> +      Enable the AF_ALG userspace interface for hash algorithms.  This
>>> +      provides unprivileged userspace programs access to arbitrary hash
>>> +      algorithms implemented in the kernel's privileged execution context.
>>>   
>>> -      See Documentation/crypto/userspace-if.rst and
>>> -      https://www.chronox.de/libkcapi/html/index.html
>>> +      This interface is deprecated and is supported only for backwards
>>> +      compatibility.  It regularly has vulnerabilities, and the capabilities
>>> +      it provides are redundant with userspace crypto libraries.
>>> +
>>> +      Enable this only if needed for support for a program that
>>> hasn't yet
>>> +      been converted to userspace crypto, for example iwd.
>>> +
>>> +      See also Documentation/crypto/userspace-if.rst
>>>   
>>> config CRYPTO_USER_API_SKCIPHER
>>> -    tristate "Symmetric key cipher algorithms"
>>> +    tristate "Symmetric key cipher algorithms (deprecated)"
>>>     depends on NET
>>>     select CRYPTO_SKCIPHER
>>>     select CRYPTO_USER_API
>>>     help
>>> -      Enable the userspace interface for symmetric key cipher algorithms.
>>> +      Enable the AF_ALG userspace interface for symmetric key algorithms.
>>> +      This provides unprivileged userspace programs access to arbitrary
>>> +      symmetric key algorithms implemented in the kernel's privileged
>>> +      execution context.
>>> +
>>> +      This interface is deprecated and is supported only for backwards
>>> +      compatibility.  It regularly has vulnerabilities, and the capabilities
>>> +      it provides are redundant with userspace crypto libraries.
>>> +
>>> +      Enable this only if needed for support for a program that
>>> hasn't yet
>>> +      been converted to userspace crypto, for example iwd, or cryptsetup
>>> +      with certain algorithms.
>>>   
>>> -      See Documentation/crypto/userspace-if.rst and
>>> -      https://www.chronox.de/libkcapi/html/index.html
>>> +      See also Documentation/crypto/userspace-if.rst
>>>   
>>> config CRYPTO_USER_API_RNG
>>> -    tristate "RNG (random number generator) algorithms"
>>> +    tristate "Random number generation algorithms (deprecated)"
>>>     depends on NET
>>>     select CRYPTO_RNG
>>>     select CRYPTO_USER_API
>>>     help
>>> -      Enable the userspace interface for RNG (random number generator)
>>> -      algorithms.
>>> +      Enable the AF_ALG userspace interface for random number generation
>>> +      (RNG) algorithms.  This provides unprivileged userspace programs
>>> +      access to arbitrary RNG algorithms implemented in the kernel's
>>> +      privileged execution context.
>>>   
>>> -      See Documentation/crypto/userspace-if.rst and
>>> -      https://www.chronox.de/libkcapi/html/index.html
>>> +      This interface is deprecated and is supported only for backwards
>>> +      compatibility.  It regularly has vulnerabilities, and the capabilities
>>> +      it provides are redundant with userspace crypto libraries as
>>> well as
>>> +      the normal kernel RNG (e.g., /dev/urandom and getrandom(2)).
>>> +
>>> +      See also Documentation/crypto/userspace-if.rst
>>>   
>>> config CRYPTO_USER_API_RNG_CAVP
>>>     bool "Enable CAVP testing of DRBG"
>>>     depends on CRYPTO_USER_API_RNG && CRYPTO_DRBG
>>>     help
>>> @@ -1330,20 +1354,29 @@ config CRYPTO_USER_API_RNG_CAVP
>>>   
>>>       This should only be enabled for CAVP testing. You should say
>>>       no unless you know what this is.
>>>   
>>> config CRYPTO_USER_API_AEAD
>>> -    tristate "AEAD cipher algorithms"
>>> +    tristate "AEAD cipher algorithms (deprecated)"
>>>     depends on NET
>>>     select CRYPTO_AEAD
>>>     select CRYPTO_SKCIPHER
>>>     select CRYPTO_USER_API
>>>     help
>>> -      Enable the userspace interface for AEAD cipher algorithms.
>>> +      Enable the AF_ALG userspace interface for authenticated encryption
>>> +      with associated data (AEAD) algorithms.  This provides unprivileged
>>> +      userspace programs access to arbitrary AEAD algorithms
>>> implemented in
>>> +      the kernel's privileged execution context.
>>> +
>>> +      This interface is deprecated and is supported only for backwards
>>> +      compatibility.  It regularly has vulnerabilities, and the capabilities
>>> +      it provides are redundant with userspace crypto libraries.
>>> +
>>> +      Enable this only if needed for support for a program that
>>> hasn't yet
>>> +      been converted to userspace crypto, for example iwd.
>>>   
>>> -      See Documentation/crypto/userspace-if.rst and
>>> -      https://www.chronox.de/libkcapi/html/index.html
>>> +      See also Documentation/crypto/userspace-if.rst
>>>   
>>> config CRYPTO_USER_API_ENABLE_OBSOLETE
>>>     bool "Obsolete cryptographic algorithms"
>>>     depends on CRYPTO_USER_API
>>>     default y
>>>   
>>> base-commit: 57b8e2d666a31fa201432d58f5fe3469a0dd83ba
>>> --
>>> 2.54.0
>>>   
>>>   
>>
> 


^ permalink raw reply

* Re: [PATCH] crypto: af_alg - Document the deprecation of AF_ALG
From: Eric Biggers @ 2026-05-10 16:32 UTC (permalink / raw)
  To: Kamran Khan
  Cc: Jeff Barnes, Andy Lutomirski, linux-crypto@vger.kernel.org,
	Herbert Xu, linux-doc@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Linus Torvalds
In-Reply-To: <0b8bba44-f6bb-4d69-b9d4-5787c276d41a@inspirated.com>

On Sun, May 10, 2026 at 08:54:07AM -0700, Kamran Khan wrote:
> Hi,
> 
> AF_ALG is useful not just for hardware-offloading, but also for memory
> isolation so that applications only get oracle access to the crypto keys and
> a memory-safety vulnerability in user applications would not immediately put
> the secret key material at risk.

Note that if that memory-safety vulnerability leads to code execution in
the application, then it doesn't matter that it "only" has oracle
access.  It can still decrypt any data encrypted by that key.

The relevant threat model would be arbitrary reads, not any
"memory-safety vulnerability".

> I understand and appreciate the concern with complex attack surface and the
> increased frequency of attacks in this area. But I fear that completely
> removing AF_ALG increases the risk for userspace applications relying on it
> for memory isolation.
> 
> What alternatives do userspace applications have on Linux for ensuring
> crypto keys are not exposed in user memory? That is, FreeBSD and NetBSD
> natively provide /dev/crypto; removing AF_ALG would kill the only equivalent
> option on the Linux side for kernel-delegated cryptography.

The standard solution is simply to use an isolated userspace process
like ssh-agent.  Yes, the keys will be in "user memory".  But "not
exposed in user memory" is *not* a correct statement of the problem.

(Also note that protecting not-actively-in-use data from arbitrary read
primitives doesn't require cryptography at all.  That can be done simply
by using mprotect() to remove read permission from the memory, then
temporarily adding it back when it needs to be accessed.)

In any case, any hypothetical security benefit provided by AF_ALG would
have to be *very high* to outweigh the continuous stream of
vulnerabilities in it.  I understand that people using AF_ALG might not
be familiar with that continuous stream of vulnerabilities, but it would
be worth spending some time researching what has been going on.

- Eric

^ permalink raw reply

* Re: [PATCH] crypto: af_alg - Document the deprecation of AF_ALG
From: Andy Lutomirski @ 2026-05-10 18:06 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Kamran Khan, Jeff Barnes, linux-crypto@vger.kernel.org,
	Herbert Xu, linux-doc@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Linus Torvalds
In-Reply-To: <20260510163204.GA2279@sol>

On Sun, May 10, 2026 at 9:33 AM Eric Biggers <ebiggers@kernel.org> wrote:

> In any case, any hypothetical security benefit provided by AF_ALG would
> have to be *very high* to outweigh the continuous stream of
> vulnerabilities in it.  I understand that people using AF_ALG might not
> be familiar with that continuous stream of vulnerabilities, but it would
> be worth spending some time researching what has been going on.


It would not be completely crazy to have a simple, straightforward
interface by which user code could ask the kernel to do a
cryptographic operation.  Think:

int compute_keyed_hash(int key_fd, const void *data, size_t len);

where key_fd encodes both the key and the hash type (HMAC-SHA256 or
whatever), and there is a very, very small menu of hashes to choose
from.

But this is not really obviously worth the hassle.  And AF_ALG is
definitely not the right interface.

^ permalink raw reply

* Re: [RFC PATCH v2 1/2] vfs: syscalls: add mkdirat2() that returns an O_DIRECTORY fd
From: Christian Brauner @ 2026-05-11 12:00 UTC (permalink / raw)
  To: Jori Koolstra
  Cc: Aleksa Sarai, Andy Lutomirski, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, Alexander Viro, Arnd Bergmann,
	H . Peter Anvin, Jan Kara, Peter Zijlstra, Andrey Albershteyn,
	Masami Hiramatsu, Jiri Olsa, Thomas Weißschuh,
	Mathieu Desnoyers, cmirabil, Greg Kroah-Hartman, Jeff Layton,
	linux-kernel, linux-fsdevel, linux-api, linux-arch
In-Reply-To: <1600596489.77018.1777916475931@kpc.webmail.kpnmail.nl>

On Mon, May 04, 2026 at 07:41:15PM +0200, Jori Koolstra wrote:
> 
> > Op 27-04-2026 17:48 CEST schreef Christian Brauner <brauner@kernel.org>:
> > 
> > So definitely a patchset worthing doing but this will be hairy. And
> > Mateusz is right. As written this doesn't work. The canonical pattern
> > how e.g., dentry_open() does it is to preallocate the file.
> > 
> 
> Is this because of Mateusz point that we should fail as soon as possible
> to prevent any fs changes from taking effect?
> 
> But like Mateusz points out, this is not really happening for open() with
> O_CREAT either. So is there any policy for what we do and do not tolerate?
> (although I agree we should definitely preallocate the file; thanks for
> pointing that pattern out).

Your version can fail in a lot more cases than O_CREAT because the file
is allocated last which is just not acceptable.

And a misbehaving LSM that ends up preventing opening a created file is
really not our concern. The system can behave in all non-standard ways
with mandatory access control.

The other concern that was brought up in some version is truncate but I
really don't understand what that is supposed to be about. O_TRUNC and
O_CREAT raally don't get in the way of each other in the way people
think they would.

Just look at the FMODE_CREATED case. If that's raised on do_open() then
O_TRUNC is ignored for very obvious reasons.

The only reason where O_TRUNC with O_CREAT matters is if the file did
already exist which also implies O_EXCL isn't raised. In that case this
ends up as a regular truncate request and then it is possible to hit the
handle_truncate() codepath. And there it really doesn't matter. A
concurrent exec or truncate that prevents you from O_CREAT | O_TRUNC
seems perfectly benign if you didn't actually create the file in the
first place. The O_TRUNC would only be honored if we did end up creating
the file. If someone else raised us in doing their own truncate or is
attempting an exe then we should most certainly not get to truncate over
them. Failing is the right thing to do here.

> 
> > I do wonder though whether we shouldn't just make O_CREAT | O_DIRECTORY
> > work. I remember that I had a vague comment about this in [1] a few
> > years ago (cf. [1]). It might even be less hairy to get that one right
> > as all the thinking for O_CREAT is already there.
> > 
> > What was the rationale for mkdirat2() instead of threading this through
> > openat()/openat2() with O_CREAT?
> > 
> 
> Because of Mateusz' objection, but I agree with Aleksa (and you in 2023)
> that this is intuitive and you mentioned POSIX allows for it.
> 
> But a more general issue, that also applies to this mkdirat2 patch,
> is Linus' objection in that same thread.[1] However, the use-case of

mkdirat2() is objectively the worse api. It forces userspace to use a
separate system call without any reason whatsoever. If you can to
O_CREAT you should also be able to to O_DIRECTORY in the same system
call. If we support O_DIRECTORY | O_CREAT we get all the lookup
restriction niceties RESOLVE_* for free. Plus, it is supportable both in
openat() and openat2() because I made that combo return an errno.

UAPI design often is a nasty mix of performance (context switches),
separation of concerns and privileges, tastefulness, and compromises you
never thought or wanted to make.

I think here it is pretty clear that O_DIRECTORY | O_CREAT is the right
thing to do. Instead of restructuring a bunch of codepaths so it can be
plumbed through to the filesystems we just reuse the existing codepaths
that give us the right context for free.

And during LSFMM the VFS maintains all agreed to proceed with
O_DIRECTORY | O_CREAT.

^ permalink raw reply

* Re: [PATCH v14 00/15] Exposing case folding behavior
From: Christian Brauner @ 2026-05-11 14:02 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Christian Brauner, linux-fsdevel, linux-ext4, linux-xfs,
	linux-cifs, linux-nfs, linux-api, linux-f2fs-devel, hirofumi,
	linkinjeon, sj1557.seo, yuezhang.mo, almaz.alexandrovich, slava,
	glaubitz, frank.li, tytso, adilger.kernel, cem, sfrench, pc,
	ronniesahlberg, sprasad, trondmy, anna, jaegeuk, chao, hansg,
	senozhatsky, Darrick J. Wong, Roland Mainz, Steve French
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

On Thu, 07 May 2026 04:52:53 -0400, Chuck Lever wrote:
> Christian, let's lock this one in. I will post subsequent changes
> as delta patches.
> 
> Following on from:
> 
> https://lore.kernel.org/linux-nfs/20251021-zypressen-bazillus-545a44af57fd@brauner/T/#m0ba197d75b7921d994cf284f3cef3a62abb11aaa
> 
> [...]

Applied to the vfs-7.2.exportfs branch of the vfs/vfs.git tree.
Patches in the vfs-7.2.exportfs branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-7.2.exportfs

[01/15] fs: Move file_kattr initialization to callers
        https://git.kernel.org/vfs/vfs/c/9d3942fa6a55
[02/15] fs: Add case sensitivity flags to file_kattr
        https://git.kernel.org/vfs/vfs/c/72504a889e52
[03/15] fat: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/d0d06cfce960
[04/15] exfat: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/64a4f2090cb2
[05/15] ntfs3: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/5fff53318cbf
[06/15] hfs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/1b25c01375e0
[07/15] hfsplus: Report case sensitivity in fileattr_get
        https://git.kernel.org/vfs/vfs/c/b9e976dd58ff
[08/15] xfs: Report case sensitivity in fileattr_get
        https://git.kernel.org/vfs/vfs/c/30617d630d2f
[09/15] cifs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/0f372b05c80c
[10/15] nfs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/3ca9954cdc04
[11/15] vboxsf: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/0f5f23d411ac
[12/15] isofs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/d56f6094035c
[13/15] nfsd: Report export case-folding via NFSv3 PATHCONF
        https://git.kernel.org/vfs/vfs/c/5ca2c8f14428
[14/15] nfsd: Implement NFSv4 FATTR4_CASE_INSENSITIVE and FATTR4_CASE_PRESERVING
        https://git.kernel.org/vfs/vfs/c/62c9555937ca
[15/15] ksmbd: Report filesystem case sensitivity via FS_ATTRIBUTE_INFORMATION
        https://git.kernel.org/vfs/vfs/c/35188379f010

^ permalink raw reply

* Re: [PATCH v14 00/15] Exposing case folding behavior
From: Christian Brauner @ 2026-05-11 14:02 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Al Viro, Jan Kara, linux-fsdevel, linux-ext4, linux-xfs,
	linux-cifs, linux-nfs, linux-api, linux-f2fs-devel, hirofumi,
	linkinjeon, sj1557.seo, yuezhang.mo, almaz.alexandrovich, slava,
	glaubitz, frank.li, tytso, adilger.kernel, cem, sfrench, pc,
	ronniesahlberg, sprasad, trondmy, anna, jaegeuk, chao, hansg,
	senozhatsky, Chuck Lever, Darrick J. Wong, Roland Mainz,
	Steve French
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

On Thu, May 07, 2026 at 04:52:53AM -0400, Chuck Lever wrote:
> Christian, let's lock this one in. I will post subsequent changes
> as delta patches.

Perfect!

^ permalink raw reply

* Re: [PATCH v14 00/15] Exposing case folding behavior
From: Chuck Lever @ 2026-05-11 14:07 UTC (permalink / raw)
  To: Christian Brauner
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Darrick J. Wong,
	Roland Mainz, Steve French
In-Reply-To: <20260511-wertverlust-vorbringen-070f016f3bd4@brauner>

On 5/11/26 10:02 AM, Christian Brauner wrote:
> On Thu, 07 May 2026 04:52:53 -0400, Chuck Lever wrote:
>> Christian, let's lock this one in. I will post subsequent changes
>> as delta patches.
>>
>> Following on from:
>>
>> https://lore.kernel.org/linux-nfs/20251021-zypressen-bazillus-545a44af57fd@brauner/T/#m0ba197d75b7921d994cf284f3cef3a62abb11aaa
>>
>> [...]
> 
> Applied to the vfs-7.2.exportfs branch of the vfs/vfs.git tree.
> Patches in the vfs-7.2.exportfs branch should appear in linux-next soon.

Not vfs-7.2-casefold ?

Fwiw, I was intending to rebase nfsd-next on the vfs integration branch,
which should have both vfs-7.2-casefold and vfs-7.2.exportfs merged in,
along with Jeff's series that implements the infrastructure to support
directory delegation properly. LMK if that's crazy talk.


-- 
Chuck Lever

^ permalink raw reply

* Re: [PATCH v14 00/15] Exposing case folding behavior
From: Christian Brauner @ 2026-05-11 14:55 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-fsdevel, linux-ext4, linux-xfs, linux-cifs, linux-nfs,
	linux-api, linux-f2fs-devel, hirofumi, linkinjeon, sj1557.seo,
	yuezhang.mo, almaz.alexandrovich, slava, glaubitz, frank.li,
	tytso, adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Darrick J. Wong,
	Roland Mainz, Steve French
In-Reply-To: <705e1769-6a5e-440d-bf50-5e5feec2b88d@oracle.com>

On Mon, May 11, 2026 at 10:07:44AM -0400, Chuck Lever wrote:
> On 5/11/26 10:02 AM, Christian Brauner wrote:
> > On Thu, 07 May 2026 04:52:53 -0400, Chuck Lever wrote:
> >> Christian, let's lock this one in. I will post subsequent changes
> >> as delta patches.
> >>
> >> Following on from:
> >>
> >> https://lore.kernel.org/linux-nfs/20251021-zypressen-bazillus-545a44af57fd@brauner/T/#m0ba197d75b7921d994cf284f3cef3a62abb11aaa
> >>
> >> [...]
> > 
> > Applied to the vfs-7.2.exportfs branch of the vfs/vfs.git tree.
> > Patches in the vfs-7.2.exportfs branch should appear in linux-next soon.
> 
> Not vfs-7.2-casefold ?
> 
> Fwiw, I was intending to rebase nfsd-next on the vfs integration branch,
> which should have both vfs-7.2-casefold and vfs-7.2.exportfs merged in,
> along with Jeff's series that implements the infrastructure to support
> directory delegation properly. LMK if that's crazy talk.

I think you should just merge:

vfs-7.2.exportfs
vfs-7.2.casefold

as the order doesn't matter and they don't depend on each other. I have
marked both branches as shared so they won't get touched again. If more
ends on either of these branches it doesn't matter to you. IOW, I think
you should just pick the minimum you need and then you don't need to
hear from again and by the time you send your pull request all the
prerequisites will already be in Linus' tree.

If you merge in vfs.all you're subject to problems from rebases. So I'd
not recommend that.

^ permalink raw reply

* Re: [PATCH v14 00/15] Exposing case folding behavior
From: Christian Brauner @ 2026-05-11 14:55 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Christian Brauner, Al Viro, Jan Kara, linux-fsdevel, linux-ext4,
	linux-xfs, linux-cifs, linux-nfs, linux-api, linux-f2fs-devel,
	hirofumi, linkinjeon, sj1557.seo, yuezhang.mo,
	almaz.alexandrovich, slava, glaubitz, frank.li, tytso,
	adilger.kernel, cem, sfrench, pc, ronniesahlberg, sprasad,
	trondmy, anna, jaegeuk, chao, hansg, senozhatsky, Darrick J. Wong,
	Roland Mainz, Steve French
In-Reply-To: <20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com>

On Thu, 07 May 2026 04:52:53 -0400, Chuck Lever wrote:
> Christian, let's lock this one in. I will post subsequent changes
> as delta patches.
> 
> Following on from:
> 
> https://lore.kernel.org/linux-nfs/20251021-zypressen-bazillus-545a44af57fd@brauner/T/#m0ba197d75b7921d994cf284f3cef3a62abb11aaa
> 
> [...]

Now on the correct branch and pushed out.

---

Applied to the vfs-7.2.casefold branch of the vfs/vfs.git tree.
Patches in the vfs-7.2.casefold branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: master

[01/15] fs: Move file_kattr initialization to callers
        https://git.kernel.org/vfs/vfs/c/14c3197ecf07
[02/15] fs: Add case sensitivity flags to file_kattr
        https://git.kernel.org/vfs/vfs/c/3035e4454142
[03/15] fat: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/c92db2ca726f
[04/15] exfat: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/27e0b573dd4a
[05/15] ntfs3: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/eeb7b37b9700
[06/15] hfs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/b6fe046c3023
[07/15] hfsplus: Report case sensitivity in fileattr_get
        https://git.kernel.org/vfs/vfs/c/a6469a15eefe
[08/15] xfs: Report case sensitivity in fileattr_get
        https://git.kernel.org/vfs/vfs/c/c9da43e4e5c3
[09/15] cifs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/e50bc12f5a36
[10/15] nfs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/92d67628a1a9
[11/15] vboxsf: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/ef14aa143f1d
[12/15] isofs: Implement fileattr_get for case sensitivity
        https://git.kernel.org/vfs/vfs/c/7bbd51b1d748
[13/15] nfsd: Report export case-folding via NFSv3 PATHCONF
        https://git.kernel.org/vfs/vfs/c/211cb2ba4877
[14/15] nfsd: Implement NFSv4 FATTR4_CASE_INSENSITIVE and FATTR4_CASE_PRESERVING
        https://git.kernel.org/vfs/vfs/c/01ee7c3d2e23
[15/15] ksmbd: Report filesystem case sensitivity via FS_ATTRIBUTE_INFORMATION
        https://git.kernel.org/vfs/vfs/c/0164df1d1de7

^ permalink raw reply

* [PATCH bpf-next v13 0/8] bpf: Extend BPF syscall with common attributes support
From: Leon Hwang @ 2026-05-11 15:28 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot

This patch series builds upon the discussion in
"[PATCH bpf-next v4 0/4] bpf: Improve error reporting for freplace attachment failure" [1].

This patch series introduces support for *common attributes* in the BPF
syscall, providing a unified mechanism for passing shared metadata across
all BPF commands, initially used by BPF_PROG_LOAD, BPF_BTF_LOAD, and
BPF_MAP_CREATE.

The initial set of common attributes includes:

1. 'log_buf': User-provided buffer for storing log output.
2. 'log_size': Size of the provided log buffer.
3. 'log_level': Verbosity level for logging.
4. 'log_true_size': Actual log size reported by kernel.

With this extension, the BPF syscall will be able to return meaningful
error messages (e.g., map creation failures), improving debuggability
and user experience.

Links:
[1] https://lore.kernel.org/bpf/20250224153352.64689-1-leon.hwang@linux.dev/

Changes:
v12 -> v13:
* Rebase on bpf-next tree to resolve code conflict.
* Report log_true_size on success path in patch #6.
* Check size instead of common->log_buf in patch #6 (per bot+bpf-ci).
* v12: https://lore.kernel.org/bpf/20260420141804.27179-1-leon.hwang@linux.dev/

v11 -> v12:
* Drop "log_" prefix in struct bpf_log_attr in patch #3.
* Drop "log_" prefix in struct bpf_log_opts in patch #7.
* Copy log_true_size using copy_to_bpfptr_offset() in patch #3 (per Alexei).
* v11: https://lore.kernel.org/bpf/20260216150445.68278-1-leon.hwang@linux.dev/

v10 -> v11:
* Collect Acked-by from Andrii, thanks.
* Validate whether log_buf, log_size, and log_level are valid by reusing
  bpf_verifier_log_attr_valid() in patch #4 (per Andrii).
* v10: https://lore.kernel.org/bpf/20260211151115.78013-1-leon.hwang@linux.dev/

v9 -> v10:
* Collect Acked-by from Andrii, thanks.
* Address comments from Andrii:
  * Drop log NULL check in bpf_log_attr_finalize().
  * Return -EFAULT early in bpf_log_attr_finalize().
  * Validate whether log_buf, log_size, and log_level are set.
  * Keep log_buf, log_size, log_level, and user-pointer log_true_size in struct
    bpf_log_attr.
  * Make prog_load and btf_load work with the new struct bpf_log_attr.
  * Add comment to log_true_size of struct bpf_log_opts in libbpf.
* Address comment from Alexei:
  * Avoid using BPF_LOG_FIXED as log_level in tests.
* v9: https://lore.kernel.org/bpf/20260202144046.30651-1-leon.hwang@linux.dev/

v8 -> v9:
* Rework reporting 'log_true_size' for prog_load, btf_load, and map_create to
  simplify struct bpf_log_attr (per Alexei).
* v8: https://lore.kernel.org/bpf/20260126151409.52072-1-leon.hwang@linux.dev/

v7 -> v8:
* Return 0 when fd < 0 and errno != EFAULT in probe_sys_bpf_ext(), then simplify
  probe_bpf_syscall_common_attrs() (per Alexei and Andrii).
* v7: https://lore.kernel.org/bpf/20260123032445.125259-1-leon.hwang@linux.dev/

v6 -> v7:
* Return -errno when fd < 0 and errno != EFAULT in probe_sys_bpf_ext().
* Convert return value of probe_sys_bpf_ext() to bool in
  probe_bpf_syscall_common_attrs().
* Address comments from Andrii:
  * Drop the comment, and handle fd >= 0 case explicitly in
    probe_sys_bpf_ext().
  * Return an error when fd >= 0 in probe_sys_bpf_ext().
* v6: https://lore.kernel.org/bpf/20260120152424.40766-1-leon.hwang@linux.dev/

v5 -> v6:
* Address comments from Andrii:
  * Update some variables' name.
  * Drop unnecessary 'close(fd)' in libbpf.
  * Rename FEAT_EXTENDED_SYSCALL to FEAT_BPF_SYSCALL_COMMON_ATTRS with
    updated description in libbpf.
  * Use EINVAL instead of EUSERS, as EUSERS is not used in bpf yet.
  * Rename struct bpf_syscall_common_attr_opts to bpf_log_opts in libbpf.
  * Add 'OPTS_SET(log_opts, log_true_size, 0);' in libbpf's 'bpf_map_create()'.
* v5: https://lore.kernel.org/bpf/20260112145616.44195-1-leon.hwang@linux.dev/

v4 -> v5:
* Rework reporting 'log_true_size' for prog_load, btf_load, and map_create
  (per Alexei).
* v4: https://lore.kernel.org/bpf/20260106172018.57757-1-leon.hwang@linux.dev/

RFC v3 -> v4:
* Drop RFC.
* Address comments from Andrii:
  * Add parentheses in 'sys_bpf_ext()'.
  * Avoid creating new fd in 'probe_sys_bpf_ext()'.
  * Add a new struct to wrap log fields in libbpf.
* Address comments from Alexei:
  * Do not skip writing to user space when log_true_size is zero.
  * Do not use 'bool' arguments.
  * Drop the adding WARN_ON_ONCE()'s.
* v3: https://lore.kernel.org/bpf/20251002154841.99348-1-leon.hwang@linux.dev/

RFC v2 -> RFC v3:
* Rename probe_sys_bpf_extended to probe_sys_bpf_ext.
* Refactor reporting 'log_true_size' for prog_load.
* Refactor reporting 'btf_log_true_size' for btf_load.
* Add warnings for internal bugs in map_create.
* Check log_true_size in test cases.
* Address comment from Alexei:
  * Change kvzalloc/kvfree to kzalloc/kfree.
* Address comments from Andrii:
  * Move BPF_COMMON_ATTRS to 'enum bpf_cmd' alongside brief comment.
  * Add bpf_check_uarg_tail_zero() for extra checks.
  * Rename sys_bpf_extended to sys_bpf_ext.
  * Rename sys_bpf_fd_extended to sys_bpf_ext_fd.
  * Probe the new feature using NULL and -EFAULT.
  * Move probe_sys_bpf_ext to libbpf_internal.h and drop LIBBPF_API.
  * Return -EUSERS when log attrs are conflict between bpf_attr and
    bpf_common_attr.
  * Avoid touching bpf_vlog_init().
  * Update the reason messages in map_create.
  * Finalize the log using __cleanup().
  * Report log size to users.
  * Change type of log_buf from '__u64' to 'const char *' and cast type
    using ptr_to_u64() in bpf_map_create().
  * Do not return -EOPNOTSUPP when kernel doesn't support this feature
    in bpf_map_create().
  * Add log_level support for map creation for consistency.
* Address comment from Eduard:
  * Use common_attrs->log_level instead of BPF_LOG_FIXED.
* v2: https://lore.kernel.org/bpf/20250911163328.93490-1-leon.hwang@linux.dev/

RFC v1 -> RFC v2:
* Fix build error reported by test bot.
* Address comments from Alexei:
  * Drop new uapi for freplace.
  * Add common attributes support for prog_load and btf_load.
  * Add common attributes support for map_create.
* v1: https://lore.kernel.org/bpf/20250728142346.95681-1-leon.hwang@linux.dev/

Leon Hwang (8):
  bpf: Extend BPF syscall with common attributes support
  libbpf: Add support for extended BPF syscall
  bpf: Refactor reporting log_true_size for prog_load
  bpf: Add syscall common attributes support for prog_load
  bpf: Add syscall common attributes support for btf_load
  bpf: Add syscall common attributes support for map_create
  libbpf: Add syscall common attributes support for map_create
  selftests/bpf: Add tests to verify map create failure log

 include/linux/bpf.h                           |   4 +-
 include/linux/bpf_verifier.h                  |  16 ++
 include/linux/btf.h                           |   3 +-
 include/linux/syscalls.h                      |   3 +-
 include/uapi/linux/bpf.h                      |   8 +
 kernel/bpf/btf.c                              |  30 +---
 kernel/bpf/log.c                              |  90 +++++++++-
 kernel/bpf/syscall.c                          | 115 +++++++++---
 kernel/bpf/verifier.c                         |  17 +-
 tools/include/uapi/linux/bpf.h                |   8 +
 tools/lib/bpf/bpf.c                           |  52 +++++-
 tools/lib/bpf/bpf.h                           |  17 +-
 tools/lib/bpf/features.c                      |   8 +
 tools/lib/bpf/libbpf_internal.h               |   3 +
 .../selftests/bpf/prog_tests/map_init.c       | 166 ++++++++++++++++++
 15 files changed, 473 insertions(+), 67 deletions(-)

--
2.54.0

^ permalink raw reply

* [PATCH bpf-next v13 1/8] bpf: Extend BPF syscall with common attributes support
From: Leon Hwang @ 2026-05-11 15:28 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260511152817.89191-1-leon.hwang@linux.dev>

Add generic BPF syscall support for passing common attributes.

The initial set of common attributes includes:

1. 'log_buf': User-provided buffer for storing logs.
2. 'log_size': Size of the log buffer.
3. 'log_level': Log verbosity level.
4. 'log_true_size': Actual log size reported by kernel.

The common-attribute pointer and its size are passed as the 4th and 5th
syscall arguments. A new command bit, 'BPF_COMMON_ATTRS' ('1 << 16'),
indicates that common attributes are supplied.

This commit adds syscall and uapi plumbing. Command-specific handling is
added in follow-up patches.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/syscalls.h       |  3 ++-
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/syscall.c           | 25 +++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f5639d5ac331..50055ab73649 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -936,7 +936,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size,
+			struct bpf_common_attr __user *attr_common, unsigned int size_common);
 asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 552bc5d9afbd..49eeb18ad050 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -994,6 +994,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */
 };
 
 enum bpf_map_type {
@@ -1500,6 +1501,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3b1f0ba02f61..354f6f471a08 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6211,8 +6211,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
 	return ret;
 }
 
-static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
+		     bpfptr_t uattr_common, unsigned int size_common)
 {
+	struct bpf_common_attr attr_common;
 	union bpf_attr attr;
 	int err;
 
@@ -6226,6 +6228,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
 		return -EFAULT;
 
+	memset(&attr_common, 0, sizeof(attr_common));
+	if (cmd & BPF_COMMON_ATTRS) {
+		err = bpf_check_uarg_tail_zero(uattr_common, sizeof(attr_common), size_common);
+		if (err)
+			return err;
+
+		cmd &= ~BPF_COMMON_ATTRS;
+		size_common = min_t(u32, size_common, sizeof(attr_common));
+		if (copy_from_bpfptr(&attr_common, uattr_common, size_common) != 0)
+			return -EFAULT;
+	} else {
+		size_common = 0;
+	}
+
 	err = security_bpf(cmd, &attr, size, uattr.is_kernel);
 	if (err < 0)
 		return err;
@@ -6361,9 +6377,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	return err;
 }
 
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
+		struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
 {
-	return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+	return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
 }
 
 static bool syscall_prog_is_valid_access(int off, int size,
@@ -6393,7 +6410,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 	default:
 		return -EINVAL;
 	}
-	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
 }
 
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 677be9a47347..16ff0968fc21 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -994,6 +994,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */
 };
 
 enum bpf_map_type {
@@ -1500,6 +1501,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
-- 
2.54.0


^ permalink raw reply related

* [PATCH bpf-next v13 2/8] libbpf: Add support for extended BPF syscall
From: Leon Hwang @ 2026-05-11 15:28 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260511152817.89191-1-leon.hwang@linux.dev>

To support the extended BPF syscall introduced in the previous commit,
introduce the following internal APIs:

* 'sys_bpf_ext()'
* 'sys_bpf_ext_fd()'
  They wrap the raw 'syscall()' interface to support passing extended
  attributes.
* 'probe_sys_bpf_ext()'
  Check whether current kernel supports the BPF syscall common attributes.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/bpf.c             | 36 +++++++++++++++++++++++++++++++++
 tools/lib/bpf/features.c        |  8 ++++++++
 tools/lib/bpf/libbpf_internal.h |  3 +++
 3 files changed, 47 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5846de364209..9d8740761b7a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,42 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
+static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr,
+			      unsigned int size,
+			      struct bpf_common_attr *attr_common,
+			      unsigned int size_common)
+{
+	cmd = attr_common ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS);
+	return syscall(__NR_bpf, cmd, attr, size, attr_common, size_common);
+}
+
+static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+				 unsigned int size,
+				 struct bpf_common_attr *attr_common,
+				 unsigned int size_common)
+{
+	int fd;
+
+	fd = sys_bpf_ext(cmd, attr, size, attr_common, size_common);
+	return ensure_good_fd(fd);
+}
+
+int probe_sys_bpf_ext(void)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+	union bpf_attr attr;
+	int fd;
+
+	memset(&attr, 0, attr_sz);
+	fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL,
+		     sizeof(struct bpf_common_attr));
+	if (fd >= 0) {
+		close(fd);
+		return -EINVAL;
+	}
+	return errno == EFAULT ? 1 : 0;
+}
+
 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 			  unsigned int size)
 {
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 4f19a0d79b0c..b7e388f99d0b 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -615,6 +615,11 @@ static int probe_kern_btf_layout(int token_fd)
 						 (char *)layout, token_fd));
 }
 
+static int probe_bpf_syscall_common_attrs(int token_fd)
+{
+	return probe_sys_bpf_ext();
+}
+
 typedef int (*feature_probe_fn)(int /* token_fd */);
 
 static struct kern_feature_cache feature_cache;
@@ -699,6 +704,9 @@ static struct kern_feature_desc {
 	[FEAT_BTF_LAYOUT] = {
 		"kernel supports BTF layout", probe_kern_btf_layout,
 	},
+	[FEAT_BPF_SYSCALL_COMMON_ATTRS] = {
+		"BPF syscall common attributes support", probe_bpf_syscall_common_attrs,
+	},
 };
 
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 3781c45b46d3..7d93c6c01d60 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -398,6 +398,8 @@ enum kern_feature_id {
 	FEAT_UPROBE_SYSCALL,
 	/* Kernel supports BTF layout information */
 	FEAT_BTF_LAYOUT,
+	/* Kernel supports BPF syscall common attributes */
+	FEAT_BPF_SYSCALL_COMMON_ATTRS,
 	__FEAT_CNT,
 };
 
@@ -768,4 +770,5 @@ int probe_fd(int fd);
 #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
 
 void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
+int probe_sys_bpf_ext(void);
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
-- 
2.54.0


^ permalink raw reply related

* [PATCH bpf-next v13 3/8] bpf: Refactor reporting log_true_size for prog_load
From: Leon Hwang @ 2026-05-11 15:28 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260511152817.89191-1-leon.hwang@linux.dev>

The next commit will add support for reporting logs via extended common
attributes, including 'log_true_size'.

To prepare for that, refactor the 'log_true_size' reporting logic by
introducing a new struct bpf_log_attr to encapsulate log-related behavior:

 * bpf_log_attr_init(): initialize log fields, which will support
   extended common attributes in the next commit.
 * bpf_log_attr_finalize(): handle log finalization and write back
   'log_true_size' to userspace.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h          |  4 +++-
 include/linux/bpf_verifier.h | 12 ++++++++++++
 kernel/bpf/log.c             | 29 +++++++++++++++++++++++++++++
 kernel/bpf/syscall.c         | 12 +++++++++---
 kernel/bpf/verifier.c        | 17 ++++-------------
 5 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 715b6df9c403..b3a0cca92e54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2919,7 +2919,9 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
 			     size_t actual_size);
 
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
+struct bpf_log_attr;
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr,
+	      struct bpf_log_attr *attr_log);
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 976e2b2f40e8..8d27ad1f9f94 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -755,6 +755,18 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 	return log && log->level;
 }
 
+struct bpf_log_attr {
+	char __user *ubuf;
+	u32 size;
+	u32 level;
+	u32 offsetof_true_size;
+	bpfptr_t uattr;
+};
+
+int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level,
+		      u32 offsetof_log_true_size, bpfptr_t uattr);
+int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log);
+
 #define BPF_MAX_SUBPROGS 256
 
 struct bpf_subprog_arg_info {
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 64566b86dd27..1b1efe75398b 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -825,3 +825,32 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 	}
 	print_verifier_state(env, vstate, frameno, false);
 }
+
+int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level,
+		      u32 offsetof_log_true_size, bpfptr_t uattr)
+{
+	char __user *ubuf = u64_to_user_ptr(log_buf);
+
+	memset(log, 0, sizeof(*log));
+	log->ubuf = ubuf;
+	log->size = log_size;
+	log->level = log_level;
+	log->offsetof_true_size = offsetof_log_true_size;
+	log->uattr = uattr;
+	return 0;
+}
+
+int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log)
+{
+	u32 log_true_size;
+	int err;
+
+	err = bpf_vlog_finalize(log, &log_true_size);
+
+	if (attr->offsetof_true_size &&
+	    copy_to_bpfptr_offset(attr->uattr, attr->offsetof_true_size, &log_true_size,
+				  sizeof(log_true_size)))
+		return -EFAULT;
+
+	return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 354f6f471a08..70b78ddcdedb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2861,7 +2861,7 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_PROG_LOAD_LAST_FIELD keyring_id
 
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog, *dst_prog = NULL;
@@ -3079,7 +3079,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 		goto free_prog_sec;
 
 	/* run eBPF verifier */
-	err = bpf_check(&prog, attr, uattr, uattr_size);
+	err = bpf_check(&prog, attr, uattr, attr_log);
 	if (err < 0)
 		goto free_used_maps;
 
@@ -6215,6 +6215,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		     bpfptr_t uattr_common, unsigned int size_common)
 {
 	struct bpf_common_attr attr_common;
+	u32 offsetof_log_true_size = 0;
+	struct bpf_log_attr attr_log;
 	union bpf_attr attr;
 	int err;
 
@@ -6266,7 +6268,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		err = map_freeze(&attr);
 		break;
 	case BPF_PROG_LOAD:
-		err = bpf_prog_load(&attr, uattr, size);
+		if (size >= offsetofend(union bpf_attr, log_true_size))
+			offsetof_log_true_size = offsetof(union bpf_attr, log_true_size);
+		err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level,
+					offsetof_log_true_size, uattr);
+		err = err ?: bpf_prog_load(&attr, uattr, &attr_log);
 		break;
 	case BPF_OBJ_PIN:
 		err = bpf_obj_pin(&attr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 11054ad89c14..0e654ef01ae0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19294,12 +19294,12 @@ int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return 0;
 }
 
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
+	      struct bpf_log_attr *attr_log)
 {
 	u64 start_time = ktime_get_ns();
 	struct bpf_verifier_env *env;
 	int i, len, ret = -EINVAL, err;
-	u32 log_true_size;
 	bool is_priv;
 
 	BTF_TYPE_EMIT(enum bpf_features);
@@ -19346,9 +19346,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	/* user could have requested verbose verifier output
 	 * and supplied buffer to store the verification trace
 	 */
-	ret = bpf_vlog_init(&env->log, attr->log_level,
-			    (char __user *) (unsigned long) attr->log_buf,
-			    attr->log_size);
+	ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size);
 	if (ret)
 		goto err_unlock;
 
@@ -19510,17 +19508,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	env->prog->aux->verified_insns = env->insn_processed;
 
 	/* preserve original error even if log finalization is successful */
-	err = bpf_vlog_finalize(&env->log, &log_true_size);
+	err = bpf_log_attr_finalize(attr_log, &env->log);
 	if (err)
 		ret = err;
 
-	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
-	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
-				  &log_true_size, sizeof(log_true_size))) {
-		ret = -EFAULT;
-		goto err_release_maps;
-	}
-
 	if (ret)
 		goto err_release_maps;
 
-- 
2.54.0


^ permalink raw reply related

* [PATCH bpf-next v13 4/8] bpf: Add syscall common attributes support for prog_load
From: Leon Hwang @ 2026-05-11 15:28 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260511152817.89191-1-leon.hwang@linux.dev>

BPF_PROG_LOAD can now take log parameters from both union bpf_attr and
struct bpf_common_attr. The merge rules are:

- if both sides provide a complete log tuple (buf/size/level) and they
  match, use it;
- if only one side provides log parameters, use that one;
- if both sides provide complete tuples but they differ, return -EINVAL.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf_verifier.h |  3 ++-
 kernel/bpf/log.c             | 34 +++++++++++++++++++++++++++-------
 kernel/bpf/syscall.c         |  3 ++-
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8d27ad1f9f94..8433430dedb7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -764,7 +764,8 @@ struct bpf_log_attr {
 };
 
 int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level,
-		      u32 offsetof_log_true_size, bpfptr_t uattr);
+		      u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common,
+		      bpfptr_t uattr_common, u32 size_common);
 int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log);
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 1b1efe75398b..fd12ad5a0338 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -13,17 +13,17 @@
 
 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
 
-static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
+static bool bpf_verifier_log_attr_valid(u32 log_level, char __user *log_buf, u32 log_size)
 {
 	/* ubuf and len_total should both be specified (or not) together */
-	if (!!log->ubuf != !!log->len_total)
+	if (!!log_buf != !!log_size)
 		return false;
 	/* log buf without log_level is meaningless */
-	if (log->ubuf && log->level == 0)
+	if (log_buf && log_level == 0)
 		return false;
-	if (log->level & ~BPF_LOG_MASK)
+	if (log_level & ~BPF_LOG_MASK)
 		return false;
-	if (log->len_total > UINT_MAX >> 2)
+	if (log_size > UINT_MAX >> 2)
 		return false;
 	return true;
 }
@@ -36,7 +36,7 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
 	log->len_total = log_size;
 
 	/* log attributes have to be sane */
-	if (!bpf_verifier_log_attr_valid(log))
+	if (!bpf_verifier_log_attr_valid(log_level, log_buf, log_size))
 		return -EINVAL;
 
 	return 0;
@@ -827,16 +827,36 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 }
 
 int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level,
-		      u32 offsetof_log_true_size, bpfptr_t uattr)
+		      u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common,
+		      bpfptr_t uattr_common, u32 size_common)
 {
+	char __user *ubuf_common = u64_to_user_ptr(common->log_buf);
 	char __user *ubuf = u64_to_user_ptr(log_buf);
 
+	if (!bpf_verifier_log_attr_valid(common->log_level, ubuf_common, common->log_size) ||
+	    !bpf_verifier_log_attr_valid(log_level, ubuf, log_size))
+		return -EINVAL;
+
+	if (ubuf && ubuf_common && (ubuf != ubuf_common || log_size != common->log_size ||
+				    log_level != common->log_level))
+		return -EINVAL;
+
 	memset(log, 0, sizeof(*log));
 	log->ubuf = ubuf;
 	log->size = log_size;
 	log->level = log_level;
 	log->offsetof_true_size = offsetof_log_true_size;
 	log->uattr = uattr;
+
+	if (!ubuf && ubuf_common) {
+		log->ubuf = ubuf_common;
+		log->size = common->log_size;
+		log->level = common->log_level;
+		log->uattr = uattr_common;
+		log->offsetof_true_size = 0;
+		if (size_common >= offsetofend(struct bpf_common_attr, log_true_size))
+			log->offsetof_true_size = offsetof(struct bpf_common_attr, log_true_size);
+	}
 	return 0;
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 70b78ddcdedb..db893cae826c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6271,7 +6271,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		if (size >= offsetofend(union bpf_attr, log_true_size))
 			offsetof_log_true_size = offsetof(union bpf_attr, log_true_size);
 		err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level,
-					offsetof_log_true_size, uattr);
+					offsetof_log_true_size, uattr, &attr_common, uattr_common,
+					size_common);
 		err = err ?: bpf_prog_load(&attr, uattr, &attr_log);
 		break;
 	case BPF_OBJ_PIN:
-- 
2.54.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox