Linux userland API discussions
 help / color / mirror / Atom feed
* [PATCH 2/3] ext4: add support for 32-bit default reserved uid and gid values
From: Theodore Ts'o via B4 Relay @ 2025-09-09  3:15 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api
In-Reply-To: <20250908-tune2fs-v1-0-e3a6929f3355@mit.edu>

From: Theodore Ts'o <tytso@mit.edu>

Support for specifying the default user id and group id that is
allowed to use the reserved block space was added way back when Linux
only supported 16-bit uid's and gid's.  (Yeah, that long ago.)  It's
not a commonly used feature, but let's add support for 32-bit user and
group id's.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  | 16 +++++++++++++++-
 fs/ext4/super.c |  8 ++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 01a6e2de7fc3ef0e20b039d3200b9c9bd656f59f..4bfcd5f0c74fda30db4009ee28fbee00a2f6b76f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1442,7 +1442,9 @@ struct ext4_super_block {
 	__le16  s_encoding;		/* Filename charset encoding */
 	__le16  s_encoding_flags;	/* Filename charset encoding flags */
 	__le32  s_orphan_file_inum;	/* Inode for tracking orphan inodes */
-	__le32	s_reserved[94];		/* Padding to the end of the block */
+	__le16	s_def_resuid_hi;
+	__le16	s_def_resgid_hi;
+	__le32	s_reserved[93];		/* Padding to the end of the block */
 	__le32	s_checksum;		/* crc32c(superblock) */
 };
 
@@ -1812,6 +1814,18 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 
+static inline int ext4_get_resuid(struct ext4_super_block *es)
+{
+	return(le16_to_cpu(es->s_def_resuid) |
+	       (le16_to_cpu(es->s_def_resuid_hi) << 16));
+}
+
+static inline int ext4_get_resgid(struct ext4_super_block *es)
+{
+	return(le16_to_cpu(es->s_def_resgid) |
+	       (le16_to_cpu(es->s_def_resgid_hi) << 16));
+}
+
 /*
  * Returns: sbi->field[index]
  * Used to access an array element from the following sbi fields which require
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 94c98446c84f9a4614971d246ca7f001de610a8a..0256c8f7c6cee2b8d9295f2fa9a7acd904382e83 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2951,11 +2951,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 	}
 
 	if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
-	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+	    ext4_get_resuid(es) != EXT4_DEF_RESUID)
 		SEQ_OPTS_PRINT("resuid=%u",
 				from_kuid_munged(&init_user_ns, sbi->s_resuid));
 	if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
-	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+	    ext4_get_resgid(es) != EXT4_DEF_RESGID)
 		SEQ_OPTS_PRINT("resgid=%u",
 				from_kgid_munged(&init_user_ns, sbi->s_resgid));
 	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
@@ -5270,8 +5270,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 
 	ext4_set_def_opts(sb, es);
 
-	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
-	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
+	sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
+	sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;

-- 
2.51.0



^ permalink raw reply related

* [PATCH 1/3] ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
From: Theodore Ts'o via B4 Relay @ 2025-09-09  3:15 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api, stable
In-Reply-To: <20250908-tune2fs-v1-0-e3a6929f3355@mit.edu>

From: Theodore Ts'o <tytso@mit.edu>

Unlike other strings in the ext4 superblock, we rely on tune2fs to
make sure s_mount_opts is NUL terminated.  Harden
parse_apply_sb_mount_options() by treating s_mount_opts as a potential
__nonstring.

Cc: stable@vger.kernel.org
Fixes: 8b67f04ab9de ("ext4: Add mount options in superblock")
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 699c15db28a82f26809bf68533454a242596f0fd..94c98446c84f9a4614971d246ca7f001de610a8a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2460,7 +2460,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
 					struct ext4_fs_context *m_ctx)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	char *s_mount_opts = NULL;
+	char s_mount_opts[65];
 	struct ext4_fs_context *s_ctx = NULL;
 	struct fs_context *fc = NULL;
 	int ret = -ENOMEM;
@@ -2468,15 +2468,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
 	if (!sbi->s_es->s_mount_opts[0])
 		return 0;
 
-	s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
-				sizeof(sbi->s_es->s_mount_opts),
-				GFP_KERNEL);
-	if (!s_mount_opts)
-		return ret;
+	strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
 
 	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
 	if (!fc)
-		goto out_free;
+		return -ENOMEM;
 
 	s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
 	if (!s_ctx)
@@ -2508,11 +2504,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
 	ret = 0;
 
 out_free:
-	if (fc) {
-		ext4_fc_free(fc);
-		kfree(fc);
-	}
-	kfree(s_mount_opts);
+	ext4_fc_free(fc);
+	kfree(fc);
 	return ret;
 }
 

-- 
2.51.0



^ permalink raw reply related

* [PATCH 0/3] ext4: Add support for mounted updates to the superblock via an ioctl
From: Theodore Ts'o via B4 Relay @ 2025-09-09  3:15 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api, stable

This patch series enables a future version of tune2fs to be able to
modify certain parts of the ext4 superblock without to write to the
block device.

The first patch fixes a potential buffer overrun caused by a
maliciously moified superblock.  The second patch adds support for
32-bit uid and gid's which can have access to the reserved blocks pool.
The last patch adds the ioctl's which will be used by tune2fs.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
Theodore Ts'o (3):
      ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
      ext4: add support for 32-bit default reserved uid and gid values
      ext4: implemet new ioctls to set and get superblock parameters

 fs/ext4/ext4.h            |  16 ++++-
 fs/ext4/ioctl.c           | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/super.c           |  25 +++-----
 include/uapi/linux/ext4.h |  75 ++++++++++++++++++++++
 4 files changed, 348 insertions(+), 24 deletions(-)
---
base-commit: b320789d6883cc00ac78ce83bccbfe7ed58afcf0
change-id: 20250830-tune2fs-3376beb72403

Best regards,
-- 
Theodore Ts'o <tytso@mit.edu>



^ permalink raw reply

* [PATCH 3/3] ext4: implemet new ioctls to set and get superblock parameters
From: Theodore Ts'o via B4 Relay @ 2025-09-09  3:15 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api
In-Reply-To: <20250908-tune2fs-v1-0-e3a6929f3355@mit.edu>

From: Theodore Ts'o <tytso@mit.edu>

Implement the EXT4_IOC_GET_TUNE_SB_PARAM and
EXT4_IOC_SET_TUNE_SB_PARAM ioctls, which allow certains superblock
parameters to be set while the file system is mounted, without needing
write access to the block device.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ioctl.c           | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/ext4.h |  75 ++++++++++++++++++++++
 2 files changed, 324 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 84e3c73952d72e436429489f5fc8b7ae1c01c7a1..569c98c962af63130c0119f60788a26a2807bd86 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -27,14 +27,16 @@
 #include "fsmap.h"
 #include <trace/events/ext4.h>
 
-typedef void ext4_update_sb_callback(struct ext4_super_block *es,
-				       const void *arg);
+typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi,
+				     struct ext4_super_block *es,
+				     const void *arg);
 
 /*
  * Superblock modification callback function for changing file system
  * label
  */
-static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
+static void ext4_sb_setlabel(struct ext4_sb_info *sbi,
+			     struct ext4_super_block *es, const void *arg)
 {
 	/* Sanity check, this should never happen */
 	BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
@@ -46,7 +48,8 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
  * Superblock modification callback function for changing file system
  * UUID.
  */
-static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg)
+static void ext4_sb_setuuid(struct ext4_sb_info *sbi,
+			    struct ext4_super_block *es, const void *arg)
 {
 	memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE);
 }
@@ -71,7 +74,7 @@ int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
 		goto out_err;
 
 	lock_buffer(bh);
-	func(es, arg);
+	func(sbi, es, arg);
 	ext4_superblock_csum_set(sb);
 	unlock_buffer(bh);
 
@@ -149,7 +152,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
 		unlock_buffer(bh);
 		goto out_bh;
 	}
-	func(es, arg);
+	func(EXT4_SB(sb), es, arg);
 	if (ext4_has_feature_metadata_csum(sb))
 		es->s_checksum = ext4_superblock_csum(es);
 	set_buffer_uptodate(bh);
@@ -1230,6 +1233,239 @@ static int ext4_ioctl_setuuid(struct file *filp,
 	return ret;
 }
 
+
+#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR |    \
+	EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \
+	EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \
+	EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \
+	EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \
+	EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \
+	EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \
+	EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \
+	EXT4_TUNE_FL_FORCE_FSCK)
+
+static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi,
+				  struct ext4_tune_sb_params __user *params)
+{
+	struct ext4_tune_sb_params ret;
+	struct ext4_super_block *es = sbi->s_es;
+
+	memset(&ret, 0, sizeof(ret));
+	ret.set_flags = TUNE_OPS_SUPPORTED;
+	ret.errors_behavior = es->s_errors;
+	ret.mnt_count = le16_to_cpu(es->s_mnt_count);
+	ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count);
+	ret.checkinterval = le32_to_cpu(es->s_checkinterval);
+	ret.last_check_time = le32_to_cpu(es->s_lastcheck);
+	ret.reserved_blocks = ext4_r_blocks_count(es);
+	ret.blocks_count = ext4_blocks_count(es);
+	ret.reserved_uid = ext4_get_resuid(es);
+	ret.reserved_gid = ext4_get_resgid(es);
+	ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts);
+	ret.def_hash_alg = es->s_def_hash_version;
+	ret.raid_stride = le16_to_cpu(es->s_raid_stride);
+	ret.raid_stripe_width = le16_to_cpu(es->s_raid_stripe_width);
+	strscpy_pad(ret.mount_opts, es->s_mount_opts);
+	ret.feature_compat = le32_to_cpu(es->s_feature_compat);
+	ret.feature_incompat = le32_to_cpu(es->s_feature_incompat);
+	ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat);
+	ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP;
+	ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP;
+	ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP;
+	ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP;
+	ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP;
+	ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP;
+	if (copy_to_user(params, &ret, sizeof(ret)))
+		return -EFAULT;
+	return 0;
+}
+
+static void ext4_sb_setparams(struct ext4_sb_info *sbi,
+			      struct ext4_super_block *es, const void *arg)
+{
+	const struct ext4_tune_sb_params *params = arg;
+
+	if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR)
+		es->s_errors = cpu_to_le16(params->errors_behavior);
+	if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT)
+		es->s_mnt_count = cpu_to_le16(params->mnt_count);
+	if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT)
+		es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count);
+	if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL)
+		es->s_checkinterval = cpu_to_le32(params->checkinterval);
+	if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME)
+		es->s_lastcheck = cpu_to_le32(params->last_check_time);
+	if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) {
+		ext4_fsblk_t blk = params->reserved_blocks;
+
+		es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
+		es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+	}
+	if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) {
+		int uid = params->reserved_uid;
+
+		es->s_def_resuid = cpu_to_le16(uid & 0xFFFF);
+		es->s_def_resuid_hi = cpu_to_le16(uid >> 16);
+	}
+	if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) {
+		int gid = params->reserved_gid;
+
+		es->s_def_resgid = cpu_to_le16(gid & 0xFFFF);
+		es->s_def_resgid_hi = cpu_to_le16(gid >> 16);
+	}
+	if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS)
+		es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts);
+	if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+		es->s_def_hash_version = params->def_hash_alg;
+	if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE)
+		es->s_raid_stride = cpu_to_le16(params->raid_stride);
+	if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH)
+		es->s_raid_stripe_width =
+			cpu_to_le16(params->raid_stripe_width);
+	strscpy_pad(es->s_mount_opts, params->mount_opts);
+	if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
+		es->s_feature_compat |=
+			cpu_to_le32(params->set_feature_compat_mask);
+		es->s_feature_incompat |=
+			cpu_to_le32(params->set_feature_incompat_mask);
+		es->s_feature_ro_compat |=
+			cpu_to_le32(params->set_feature_ro_compat_mask);
+		es->s_feature_compat &=
+			~cpu_to_le32(params->clear_feature_compat_mask);
+		es->s_feature_incompat &=
+			~cpu_to_le32(params->clear_feature_incompat_mask);
+		es->s_feature_ro_compat &=
+			~cpu_to_le32(params->clear_feature_ro_compat_mask);
+		if (params->set_feature_compat_mask &
+		    EXT4_FEATURE_COMPAT_DIR_INDEX)
+			es->s_def_hash_version = sbi->s_def_hash_version;
+		if (params->set_feature_incompat_mask &
+		    EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+			es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed);
+	}
+	if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK)
+		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+}
+
+static int ext4_ioctl_set_tune_sb(struct file *filp,
+				  struct ext4_tune_sb_params __user *in)
+{
+	struct ext4_tune_sb_params params;
+	struct super_block *sb = file_inode(filp)->i_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&params, in, sizeof(params)))
+		return -EFAULT;
+
+	if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0)
+		return -EOPNOTSUPP;
+
+	if ((params.set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) &&
+	    (params.errors_behavior > EXT4_ERRORS_PANIC))
+		return -EINVAL;
+
+	if ((params.set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) &&
+	    (params.reserved_blocks > ext4_blocks_count(sbi->s_es) / 2))
+		return -EINVAL;
+	if ((params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) &&
+	    ((params.def_hash_alg > DX_HASH_LAST) ||
+	     (params.def_hash_alg == DX_HASH_SIPHASH)))
+		return -EINVAL;
+	if ((params.set_flags & EXT4_TUNE_FL_FEATURES) &&
+	    (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES))
+		return -EINVAL;
+
+	if (params.set_flags & EXT4_TUNE_FL_FEATURES) {
+		params.set_feature_compat_mask =
+			params.feature_compat &
+			~le32_to_cpu(es->s_feature_compat);
+		params.set_feature_incompat_mask =
+			params.feature_incompat &
+			~le32_to_cpu(es->s_feature_incompat);
+		params.set_feature_ro_compat_mask =
+			params.feature_ro_compat &
+			~le32_to_cpu(es->s_feature_ro_compat);
+		params.clear_feature_compat_mask =
+			~params.feature_compat &
+			le32_to_cpu(es->s_feature_compat);
+		params.clear_feature_incompat_mask =
+			~params.feature_incompat &
+			le32_to_cpu(es->s_feature_incompat);
+		params.clear_feature_ro_compat_mask =
+			~params.feature_ro_compat &
+			le32_to_cpu(es->s_feature_ro_compat);
+		params.set_flags |= EXT4_TUNE_FL_EDIT_FEATURES;
+	}
+	if (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
+		if ((params.set_feature_compat_mask &
+		     ~EXT4_TUNE_SET_COMPAT_SUPP) ||
+		    (params.set_feature_incompat_mask &
+		     ~EXT4_TUNE_SET_INCOMPAT_SUPP) ||
+		    (params.set_feature_ro_compat_mask &
+		     ~EXT4_TUNE_SET_RO_COMPAT_SUPP) ||
+		    (params.clear_feature_compat_mask &
+		     ~EXT4_TUNE_CLEAR_COMPAT_SUPP) ||
+		    (params.clear_feature_incompat_mask &
+		     ~EXT4_TUNE_CLEAR_INCOMPAT_SUPP) ||
+		    (params.clear_feature_ro_compat_mask &
+		     ~EXT4_TUNE_CLEAR_RO_COMPAT_SUPP))
+			return -EOPNOTSUPP;
+
+		/*
+		 * Filter out the features that are already set from
+		 * the set_mask.
+		 */
+		params.set_feature_compat_mask &=
+			~le32_to_cpu(es->s_feature_compat);
+		params.set_feature_incompat_mask &=
+			~le32_to_cpu(es->s_feature_incompat);
+		params.set_feature_ro_compat_mask &=
+			~le32_to_cpu(es->s_feature_ro_compat);
+		if ((params.set_feature_compat_mask &
+		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+		    !ext4_has_feature_dir_index(sb)) {
+			uuid_t	uu;
+
+			memcpy(&uu, sbi->s_hash_seed, UUID_SIZE);
+			if (uuid_is_null(&uu))
+				generate_random_uuid((char *)
+						     &sbi->s_hash_seed);
+			if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+				sbi->s_def_hash_version = params.def_hash_alg;
+			else if (sbi->s_def_hash_version == 0)
+				sbi->s_def_hash_version = DX_HASH_HALF_MD4;
+			if (!(es->s_flags &
+			      cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH)) &&
+			    !(es->s_flags &
+			      cpu_to_le32(EXT2_FLAGS_SIGNED_HASH))) {
+#ifdef __CHAR_UNSIGNED__
+				sbi->s_hash_unsigned = 3;
+#else
+				sbi->s_hash_unsigned = 0;
+#endif
+			}
+		}
+	}
+
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	ret = ext4_update_superblocks_fn(sb, ext4_sb_setparams, &params);
+	mnt_drop_write_file(filp);
+
+	if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
+		sbi->s_def_hash_version = params.def_hash_alg;
+
+	return ret;
+}
+
 static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -1616,6 +1852,11 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg);
 	case EXT4_IOC_SETFSUUID:
 		return ext4_ioctl_setuuid(filp, (const void __user *)arg);
+	case EXT4_IOC_GET_TUNE_SB_PARAM:
+		return ext4_ioctl_get_tune_sb(EXT4_SB(sb),
+					      (void __user *)arg);
+	case EXT4_IOC_SET_TUNE_SB_PARAM:
+		return ext4_ioctl_set_tune_sb(filp, (void __user *)arg);
 	default:
 		return -ENOTTY;
 	}
@@ -1703,7 +1944,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 #endif
 
-static void set_overhead(struct ext4_super_block *es, const void *arg)
+static void set_overhead(struct ext4_sb_info *sbi,
+			 struct ext4_super_block *es, const void *arg)
 {
 	es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
 }
diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h
index 1c4c2dd29112cda9f7dc91d917492cffc33ee524..145875fd633772e76ce7fd8bc0fef136ff620d2d 100644
--- a/include/uapi/linux/ext4.h
+++ b/include/uapi/linux/ext4.h
@@ -33,6 +33,8 @@
 #define EXT4_IOC_CHECKPOINT		_IOW('f', 43, __u32)
 #define EXT4_IOC_GETFSUUID		_IOR('f', 44, struct fsuuid)
 #define EXT4_IOC_SETFSUUID		_IOW('f', 44, struct fsuuid)
+#define EXT4_IOC_GET_TUNE_SB_PARAM	_IOR('f', 45, struct ext4_tune_sb_params)
+#define EXT4_IOC_SET_TUNE_SB_PARAM	_IOW('f', 46, struct ext4_tune_sb_params)
 
 #define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32)
 
@@ -108,6 +110,79 @@ struct ext4_new_group_input {
 	__u16 unused;
 };
 
+struct ext4_tune_sb_params {
+	__u32 set_flags;
+	__u32 checkinterval;
+	__u16 errors_behavior;
+	__u16 mnt_count;
+	__u16 max_mnt_count;
+	__u16 raid_stride;
+	__u64 last_check_time;
+	__u64 reserved_blocks;
+	__u64 blocks_count;
+	__u32 default_mnt_opts;
+	__u32 reserved_uid;
+	__u32 reserved_gid;
+	__u32 raid_stripe_width;
+	__u8  def_hash_alg;
+	__u8  pad_1;
+	__u16 pad_2;
+	__u32 feature_compat;
+	__u32 feature_incompat;
+	__u32 feature_ro_compat;
+	__u32 set_feature_compat_mask;
+	__u32 set_feature_incompat_mask;
+	__u32 set_feature_ro_compat_mask;
+	__u32 clear_feature_compat_mask;
+	__u32 clear_feature_incompat_mask;
+	__u32 clear_feature_ro_compat_mask;
+	__u8  mount_opts[64];
+	__u8  pad[64];
+};
+
+#define EXT4_TUNE_FL_ERRORS_BEHAVIOR	0x00000001
+#define EXT4_TUNE_FL_MNT_COUNT		0x00000002
+#define EXT4_TUNE_FL_MAX_MNT_COUNT	0x00000004
+#define EXT4_TUNE_FL_CHECKINTRVAL	0x00000008
+#define EXT4_TUNE_FL_LAST_CHECK_TIME	0x00000010
+#define EXT4_TUNE_FL_RESERVED_BLOCKS	0x00000020
+#define EXT4_TUNE_FL_RESERVED_UID	0x00000040
+#define EXT4_TUNE_FL_RESERVED_GID	0x00000080
+#define EXT4_TUNE_FL_DEFAULT_MNT_OPTS	0x00000100
+#define EXT4_TUNE_FL_DEF_HASH_ALG	0x00000200
+#define EXT4_TUNE_FL_RAID_STRIDE	0x00000400
+#define EXT4_TUNE_FL_RAID_STRIPE_WIDTH	0x00000800
+#define EXT4_TUNE_FL_MOUNT_OPTS		0x00001000
+#define EXT4_TUNE_FL_FEATURES		0x00002000
+#define EXT4_TUNE_FL_EDIT_FEATURES	0x00004000
+#define EXT4_TUNE_FL_FORCE_FSCK		0x00008000
+
+#define EXT4_TUNE_SET_COMPAT_SUPP \
+		(EXT4_FEATURE_COMPAT_DIR_INDEX |	\
+		 EXT4_FEATURE_COMPAT_STABLE_INODES)
+#define EXT4_TUNE_SET_INCOMPAT_SUPP \
+		(EXT4_FEATURE_INCOMPAT_EXTENTS |	\
+		 EXT4_FEATURE_INCOMPAT_EA_INODE |	\
+		 EXT4_FEATURE_INCOMPAT_ENCRYPT |	\
+		 EXT4_FEATURE_INCOMPAT_CSUM_SEED |	\
+		 EXT4_FEATURE_INCOMPAT_LARGEDIR |	\
+		 EXT4_FEATURE_INCOMPAT_CASEFOLD)
+#define EXT4_TUNE_SET_RO_COMPAT_SUPP \
+		(EXT4_FEATURE_RO_COMPAT_LARGE_FILE |	\
+		 EXT4_FEATURE_RO_COMPAT_DIR_NLINK |	\
+		 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE |	\
+		 EXT4_FEATURE_RO_COMPAT_READONLY |	\
+		 EXT4_FEATURE_RO_COMPAT_PROJECT |	\
+		 EXT4_FEATURE_RO_COMPAT_VERITY)
+
+#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0)
+#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0)
+#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP \
+		(EXT4_FEATURE_RO_COMPAT_LARGE_FILE |	\
+		 EXT4_FEATURE_RO_COMPAT_DIR_NLINK |	\
+		 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE |	\
+		 EXT4_FEATURE_RO_COMPAT_PROJECT)
+
 /*
  * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
  * It indicates that the entry in extent status cache is for a hole.

-- 
2.51.0



^ permalink raw reply related

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pratyush Yadav @ 2025-09-09 14:48 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Chris Li, Pasha Tatashin, pratyush, jasonmiu, graf, changyuanl,
	rppt, dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie,
	ojeda, aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <20250904173433.GA616306@nvidia.com>

On Thu, Sep 04 2025, Jason Gunthorpe wrote:

> On Wed, Sep 03, 2025 at 05:01:15AM -0700, Chris Li wrote:
>
>> > And if you want to serialize that the optimal path would be to have a
>> > vmalloc of all the strings and a vmalloc of the [] data, sort of like
>> > the kho array idea.
>> 
>> The KHO array idea is already implemented in the existing KHO code or
>> that is something new you want to propose?
>
> Pratyush has proposed it

I just sent out the RFC:
https://lore.kernel.org/linux-mm/20250909144426.33274-1-pratyush@kernel.org/T/#u

[...]

-- 
Regards,
Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pratyush Yadav @ 2025-09-09 14:53 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Pratyush Yadav, Pasha Tatashin, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <20250904144240.GO470103@nvidia.com>

On Thu, Sep 04 2025, Jason Gunthorpe wrote:

> On Thu, Sep 04, 2025 at 02:57:35PM +0200, Pratyush Yadav wrote:
>
>> I don't think it matters if they are preserved or not. The serialization
>> and deserialization is independent of that. You can very well create a
>> KHO array that you don't KHO-preserve. On next boot, you can still use
>> it, you just have to be careful of doing it while scratch-only. Same as
>> we do now.
>
> The KHO array machinery itself can't preserve its own memory
> either.

It can. Maybe it couldn't in the version I showed you, but now it can.
See kho_array_preserve() in
https://lore.kernel.org/linux-mm/20250909144426.33274-2-pratyush@kernel.org/

>
>> For the _hypervisor_ live update case, sure. Though even there, I have a
>> feeling we will start seeing userspace components on the hypervisor use
>> memfd for stashing some of their state. 
>
> Sure, but don't make excessively sparse memfds for kexec use, why
> should that be hard?

Sure, I don't think they should be excessively sparse. But _some_ level
of sparseness can be there.

>
>> applications. Think big storage nodes with memory in order of TiB. Those
>> can use a memfd to back their caches so on a kernel upgrade the caches
>> don't have to be re-fetched. Sparseness is to be expected for such use
>> cases.
>
> Oh? I'm surpised you'd have sparseness there. sparseness seems like
> such a weird feature to want to rely on :\
>
>> But perhaps it might be a better idea to come up with a mechanism for
>> the kernel to discover which formats the "next" kernel speaks so it can
>> for one decide whether it can do the live update at all, and for another
>> which formats it should use. Maybe we give a way for luod to choose
>> formats, and give it the responsibility for doing these checks?
>
> I have felt that we should catalog the formats&versions the kernel can
> read/write in some way during kbuild.
>
> Maybe this turns into a sysfs directory of all the data with an
> 'enable_write' flag that luod could set to 0 to optimize.
>
> And maybe this could be a kbuild report that luod could parse to do
> this optimization.

Or maybe we put that information in a ELF section in the kernel image?
Not sure how feasible it would be for tooling to read but I think that
would very closely associate the versions info with the kernel. The
other option might be to put it somewhere with modules I guess.

>
> And maybe distro/csps use this information mechanically to check if
> version pairs are kexec compatible.
>
> Which re-enforces my feeling that the formats/version should be first
> class concepts, every version should be registered and luo should
> sequence calling the code for the right version at the right time.
>
> Jason

-- 
Regards,
Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pasha Tatashin @ 2025-09-09 15:40 UTC (permalink / raw)
  To: Pratyush Yadav
  Cc: Jason Gunthorpe, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <mafs0cy7zllsn.fsf@yadavpratyush.com>

On Tue, Sep 9, 2025 at 10:53 AM Pratyush Yadav <me@yadavpratyush.com> wrote:
>
> On Thu, Sep 04 2025, Jason Gunthorpe wrote:
>
> > On Thu, Sep 04, 2025 at 02:57:35PM +0200, Pratyush Yadav wrote:
> >
> >> I don't think it matters if they are preserved or not. The serialization
> >> and deserialization is independent of that. You can very well create a
> >> KHO array that you don't KHO-preserve. On next boot, you can still use
> >> it, you just have to be careful of doing it while scratch-only. Same as
> >> we do now.
> >
> > The KHO array machinery itself can't preserve its own memory
> > either.
>
> It can. Maybe it couldn't in the version I showed you, but now it can.
> See kho_array_preserve() in
> https://lore.kernel.org/linux-mm/20250909144426.33274-2-pratyush@kernel.org/
>
> >
> >> For the _hypervisor_ live update case, sure. Though even there, I have a
> >> feeling we will start seeing userspace components on the hypervisor use
> >> memfd for stashing some of their state.
> >
> > Sure, but don't make excessively sparse memfds for kexec use, why
> > should that be hard?
>
> Sure, I don't think they should be excessively sparse. But _some_ level
> of sparseness can be there.

This is right; loosely sparse memfd support is needed. However, an
excessively sparse preservation will be inefficient for LU, unless we
change the backing to be from a separate pool of physical pages that
is always preserved. If we do that, it would probably make sense only
for guestmemfd and only if we ever decide to support overcommitted
VMs. I suspect it is not something that we currently need to worry
about.

> >> applications. Think big storage nodes with memory in order of TiB. Those
> >> can use a memfd to back their caches so on a kernel upgrade the caches
> >> don't have to be re-fetched. Sparseness is to be expected for such use
> >> cases.
> >
> > Oh? I'm surpised you'd have sparseness there. sparseness seems like
> > such a weird feature to want to rely on :\
> >
> >> But perhaps it might be a better idea to come up with a mechanism for
> >> the kernel to discover which formats the "next" kernel speaks so it can
> >> for one decide whether it can do the live update at all, and for another
> >> which formats it should use. Maybe we give a way for luod to choose
> >> formats, and give it the responsibility for doing these checks?
> >
> > I have felt that we should catalog the formats&versions the kernel can
> > read/write in some way during kbuild.
> >
> > Maybe this turns into a sysfs directory of all the data with an
> > 'enable_write' flag that luod could set to 0 to optimize.
> >
> > And maybe this could be a kbuild report that luod could parse to do
> > this optimization.
>
> Or maybe we put that information in a ELF section in the kernel image?
> Not sure how feasible it would be for tooling to read but I think that
> would very closely associate the versions info with the kernel. The
> other option might be to put it somewhere with modules I guess.

To me, all this sounds like hardening, which, while important, can be
added later. The pre-kexec check for compatibility can be defined and
implemented once we have all live update components ready
(KHO/LUO/PCI/IOMMU/VFIO/MEMFD), once we stabilize the versioning
story, and once we start discussing update stability.

Currently, we've agreed that there are no stability guarantees.
Sometime in the future, we may guarantee minor-to-minor stability, and
later, stable-to-stable. Once we start working on minor-to-minor
stability, it would be a good idea to also add hardening where a
pre-live update would check for compatibility.

In reality, this is not something that is high priority for cloud
providers, because these kinds of incompatibilities would be found
during qualification; the kernel will fail to update by detecting a
version mismatch during boot instead of during shutdown.

> > And maybe distro/csps use this information mechanically to check if
> > version pairs are kexec compatible.
> >
> > Which re-enforces my feeling that the formats/version should be first
> > class concepts, every version should be registered and luo should
> > sequence calling the code for the right version at the right time.
> >
> > Jason
>
> --
> Regards,
> Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Jason Gunthorpe @ 2025-09-09 15:54 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: Pratyush Yadav, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <CA+CK2bAKL-gyER2abOV-f4M6HOx9=xDE+=jtcDL6YFbQf1-6og@mail.gmail.com>

On Tue, Sep 09, 2025 at 11:40:18AM -0400, Pasha Tatashin wrote:
> In reality, this is not something that is high priority for cloud
> providers, because these kinds of incompatibilities would be found
> during qualification; the kernel will fail to update by detecting a
> version mismatch during boot instead of during shutdown.

Given I expect CSPs will have to add-in specific version support for
their own special version-pair needs, I think it would be helpful in
the long run to have a tool that reported what versions a kernel build
wrote and parsed. Test-to-learn the same information sounds a bit too
difficult.

Something with ELF is probably how to do that, but I don't imagine a
use in a runtime check consuming this information.

Jason

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pratyush Yadav @ 2025-09-09 15:56 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: Pratyush Yadav, Jason Gunthorpe, Pratyush Yadav, jasonmiu, graf,
	changyuanl, rppt, dmatlack, rientjes, corbet, rdunlap,
	ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm, tj,
	yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, zhangguopeng,
	linux, linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, lennart,
	brauner, linux-api, linux-fsdevel, saeedm, ajayachandra, parav,
	leonro, witu
In-Reply-To: <CA+CK2bAKL-gyER2abOV-f4M6HOx9=xDE+=jtcDL6YFbQf1-6og@mail.gmail.com>

On Tue, Sep 09 2025, Pasha Tatashin wrote:

> On Tue, Sep 9, 2025 at 10:53 AM Pratyush Yadav <me@yadavpratyush.com> wrote:
>>
>> On Thu, Sep 04 2025, Jason Gunthorpe wrote:
>>
>> > On Thu, Sep 04, 2025 at 02:57:35PM +0200, Pratyush Yadav wrote:
[...]
>> >> But perhaps it might be a better idea to come up with a mechanism for
>> >> the kernel to discover which formats the "next" kernel speaks so it can
>> >> for one decide whether it can do the live update at all, and for another
>> >> which formats it should use. Maybe we give a way for luod to choose
>> >> formats, and give it the responsibility for doing these checks?
>> >
>> > I have felt that we should catalog the formats&versions the kernel can
>> > read/write in some way during kbuild.
>> >
>> > Maybe this turns into a sysfs directory of all the data with an
>> > 'enable_write' flag that luod could set to 0 to optimize.
>> >
>> > And maybe this could be a kbuild report that luod could parse to do
>> > this optimization.
>>
>> Or maybe we put that information in a ELF section in the kernel image?
>> Not sure how feasible it would be for tooling to read but I think that
>> would very closely associate the versions info with the kernel. The
>> other option might be to put it somewhere with modules I guess.
>
> To me, all this sounds like hardening, which, while important, can be
> added later. The pre-kexec check for compatibility can be defined and
> implemented once we have all live update components ready
> (KHO/LUO/PCI/IOMMU/VFIO/MEMFD), once we stabilize the versioning
> story, and once we start discussing update stability.

Right. I don't think this is something the current LUO patches have to
solve. This is for later down the line.

>
> Currently, we've agreed that there are no stability guarantees.
> Sometime in the future, we may guarantee minor-to-minor stability, and
> later, stable-to-stable. Once we start working on minor-to-minor
> stability, it would be a good idea to also add hardening where a
> pre-live update would check for compatibility.
>
> In reality, this is not something that is high priority for cloud
> providers, because these kinds of incompatibilities would be found
> during qualification; the kernel will fail to update by detecting a
> version mismatch during boot instead of during shutdown.

I think it would help with making a wider range of roll back and forward
options available. For example, if your current kernel can speak version
A and B, and you are rolling back to a kernel that only speaks A, this
information can be used to choose the right serialization formats.

[...]

-- 
Regards,
Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pasha Tatashin @ 2025-09-09 16:25 UTC (permalink / raw)
  To: Pratyush Yadav
  Cc: Jason Gunthorpe, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <mafs0h5xbk4ap.fsf@yadavpratyush.com>

> I think it would help with making a wider range of roll back and forward
> options available. For example, if your current kernel can speak version
> A and B, and you are rolling back to a kernel that only speaks A, this
> information can be used to choose the right serialization formats.

At least for upstream, we discussed not to support rolling back (this
can be revised in the future), but for now rollback is something that
would need to be taken care of downstream.

Pasha

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pasha Tatashin @ 2025-09-09 16:30 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Pratyush Yadav, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <20250909155407.GO789684@nvidia.com>

On Tue, Sep 9, 2025 at 11:54 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Tue, Sep 09, 2025 at 11:40:18AM -0400, Pasha Tatashin wrote:
> > In reality, this is not something that is high priority for cloud
> > providers, because these kinds of incompatibilities would be found
> > during qualification; the kernel will fail to update by detecting a
> > version mismatch during boot instead of during shutdown.
>
> Given I expect CSPs will have to add-in specific version support for
> their own special version-pair needs, I think it would be helpful in
> the long run to have a tool that reported what versions a kernel build
> wrote and parsed. Test-to-learn the same information sounds a bit too
> difficult.

Yes, I agree. My point was only about the near term: it's just not a
priority at the moment. This won't block us in the future, as we can
always add a tooling later to inject the required ELF segments for
pre-live update checks.

Pasha

^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Jason Gunthorpe @ 2025-09-09 16:57 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: Pratyush Yadav, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <CA+CK2bAvxvXKKanKzMZYrknBnVBUGBwYmgXppdiPbotbXRkGeQ@mail.gmail.com>

On Tue, Sep 09, 2025 at 12:30:35PM -0400, Pasha Tatashin wrote:
> On Tue, Sep 9, 2025 at 11:54 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Tue, Sep 09, 2025 at 11:40:18AM -0400, Pasha Tatashin wrote:
> > > In reality, this is not something that is high priority for cloud
> > > providers, because these kinds of incompatibilities would be found
> > > during qualification; the kernel will fail to update by detecting a
> > > version mismatch during boot instead of during shutdown.
> >
> > Given I expect CSPs will have to add-in specific version support for
> > their own special version-pair needs, I think it would be helpful in
> > the long run to have a tool that reported what versions a kernel build
> > wrote and parsed. Test-to-learn the same information sounds a bit too
> > difficult.
> 
> Yes, I agree. My point was only about the near term: it's just not a
> priority at the moment. This won't block us in the future, as we can
> always add a tooling later to inject the required ELF segments for
> pre-live update checks.

Yes, but lets design things to have this kind of logical code model
where there are specific serializations, with specific versions that
are at least discoverably by greping for some struct luo_xxx_ops or
whatever.

Let's avoid open coding versioning stuff where it is hard to find and
hard to later make a manifest out of

Jason


^ permalink raw reply

* Re: [PATCH v3 29/30] luo: allow preserving memfd
From: Pasha Tatashin @ 2025-09-09 17:27 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Pratyush Yadav, Pratyush Yadav, jasonmiu, graf, changyuanl, rppt,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, zhangguopeng, linux, linux-kernel,
	linux-doc, linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86,
	hpa, rafael, dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham,
	yesanishhere, Jonathan.Cameron, quic_zijuhu, aleksander.lobakin,
	ira.weiny, andriy.shevchenko, leon, lukas, bhelgaas, wagi,
	djeffery, stuart.w.hayes, lennart, brauner, linux-api,
	linux-fsdevel, saeedm, ajayachandra, parav, leonro, witu
In-Reply-To: <20250909165718.GP789684@nvidia.com>

> Yes, but lets design things to have this kind of logical code model
> where there are specific serializations, with specific versions that
> are at least discoverably by greping for some struct luo_xxx_ops or
> whatever.
>
> Let's avoid open coding versioning stuff where it is hard to find and
> hard to later make a manifest out of

Fully agreed, the versioning has to be centralized (not open coded,
and verified in a single place) & discoverable.

>
> Jason
>

^ permalink raw reply

* Re: [PATCH 3/3] ext4: implemet new ioctls to set and get superblock parameters
From: kernel test robot @ 2025-09-09 21:33 UTC (permalink / raw)
  To: Theodore Ts'o via B4 Relay, tytso
  Cc: oe-kbuild-all, linux-ext4, linux-api
In-Reply-To: <20250908-tune2fs-v1-3-e3a6929f3355@mit.edu>

Hi Theodore,

kernel test robot noticed the following build warnings:

[auto build test WARNING on b320789d6883cc00ac78ce83bccbfe7ed58afcf0]

url:    https://github.com/intel-lab-lkp/linux/commits/Theodore-Ts-o-via-B4-Relay/ext4-avoid-potential-buffer-over-read-in-parse_apply_sb_mount_options/20250909-111746
base:   b320789d6883cc00ac78ce83bccbfe7ed58afcf0
patch link:    https://lore.kernel.org/r/20250908-tune2fs-v1-3-e3a6929f3355%40mit.edu
patch subject: [PATCH 3/3] ext4: implemet new ioctls to set and get superblock parameters
config: csky-randconfig-r123-20250910 (https://download.01.org/0day-ci/archive/20250910/202509100550.fj5qrPH5-lkp@intel.com/config)
compiler: csky-linux-gcc (GCC) 10.5.0
reproduce: (https://download.01.org/0day-ci/archive/20250910/202509100550.fj5qrPH5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509100550.fj5qrPH5-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> fs/ext4/ioctl.c:1255:29: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned short [addressable] [assigned] [usertype] errors_behavior @@     got restricted __le16 [usertype] s_errors @@
   fs/ext4/ioctl.c:1255:29: sparse:     expected unsigned short [addressable] [assigned] [usertype] errors_behavior
   fs/ext4/ioctl.c:1255:29: sparse:     got restricted __le16 [usertype] s_errors
>> fs/ext4/ioctl.c:1267:33: sparse: sparse: cast to restricted __le16
>> fs/ext4/ioctl.c:1267:33: sparse: sparse: cast from restricted __le32
>> fs/ext4/ioctl.c:1323:41: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __le32 [usertype] s_raid_stripe_width @@     got restricted __le16 [usertype] @@
   fs/ext4/ioctl.c:1323:41: sparse:     expected restricted __le32 [usertype] s_raid_stripe_width
   fs/ext4/ioctl.c:1323:41: sparse:     got restricted __le16 [usertype]
   fs/ext4/ioctl.c: note: in included file (through include/linux/uaccess.h, include/linux/sched/task.h, include/linux/sched/signal.h, ...):
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: cast removes address space '__user' of expression
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: asm output is not an lvalue
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: cast removes address space '__user' of expression
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: cast removes address space '__user' of expression
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: asm output is not an lvalue
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: cast removes address space '__user' of expression
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: generating address of non-lvalue (11)
   arch/csky/include/asm/uaccess.h:110:17: sparse: sparse: generating address of non-lvalue (11)

vim +1255 fs/ext4/ioctl.c

  1235	
  1236	
  1237	#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR |    \
  1238		EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \
  1239		EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \
  1240		EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \
  1241		EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \
  1242		EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \
  1243		EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \
  1244		EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \
  1245		EXT4_TUNE_FL_FORCE_FSCK)
  1246	
  1247	static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi,
  1248					  struct ext4_tune_sb_params __user *params)
  1249	{
  1250		struct ext4_tune_sb_params ret;
  1251		struct ext4_super_block *es = sbi->s_es;
  1252	
  1253		memset(&ret, 0, sizeof(ret));
  1254		ret.set_flags = TUNE_OPS_SUPPORTED;
> 1255		ret.errors_behavior = es->s_errors;
  1256		ret.mnt_count = le16_to_cpu(es->s_mnt_count);
  1257		ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count);
  1258		ret.checkinterval = le32_to_cpu(es->s_checkinterval);
  1259		ret.last_check_time = le32_to_cpu(es->s_lastcheck);
  1260		ret.reserved_blocks = ext4_r_blocks_count(es);
  1261		ret.blocks_count = ext4_blocks_count(es);
  1262		ret.reserved_uid = ext4_get_resuid(es);
  1263		ret.reserved_gid = ext4_get_resgid(es);
  1264		ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts);
  1265		ret.def_hash_alg = es->s_def_hash_version;
  1266		ret.raid_stride = le16_to_cpu(es->s_raid_stride);
> 1267		ret.raid_stripe_width = le16_to_cpu(es->s_raid_stripe_width);
  1268		strscpy_pad(ret.mount_opts, es->s_mount_opts);
  1269		ret.feature_compat = le32_to_cpu(es->s_feature_compat);
  1270		ret.feature_incompat = le32_to_cpu(es->s_feature_incompat);
  1271		ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat);
  1272		ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP;
  1273		ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP;
  1274		ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP;
  1275		ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP;
  1276		ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP;
  1277		ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP;
  1278		if (copy_to_user(params, &ret, sizeof(ret)))
  1279			return -EFAULT;
  1280		return 0;
  1281	}
  1282	
  1283	static void ext4_sb_setparams(struct ext4_sb_info *sbi,
  1284				      struct ext4_super_block *es, const void *arg)
  1285	{
  1286		const struct ext4_tune_sb_params *params = arg;
  1287	
  1288		if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR)
  1289			es->s_errors = cpu_to_le16(params->errors_behavior);
  1290		if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT)
  1291			es->s_mnt_count = cpu_to_le16(params->mnt_count);
  1292		if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT)
  1293			es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count);
  1294		if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL)
  1295			es->s_checkinterval = cpu_to_le32(params->checkinterval);
  1296		if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME)
  1297			es->s_lastcheck = cpu_to_le32(params->last_check_time);
  1298		if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) {
  1299			ext4_fsblk_t blk = params->reserved_blocks;
  1300	
  1301			es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
  1302			es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
  1303		}
  1304		if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) {
  1305			int uid = params->reserved_uid;
  1306	
  1307			es->s_def_resuid = cpu_to_le16(uid & 0xFFFF);
  1308			es->s_def_resuid_hi = cpu_to_le16(uid >> 16);
  1309		}
  1310		if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) {
  1311			int gid = params->reserved_gid;
  1312	
  1313			es->s_def_resgid = cpu_to_le16(gid & 0xFFFF);
  1314			es->s_def_resgid_hi = cpu_to_le16(gid >> 16);
  1315		}
  1316		if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS)
  1317			es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts);
  1318		if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
  1319			es->s_def_hash_version = params->def_hash_alg;
  1320		if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE)
  1321			es->s_raid_stride = cpu_to_le16(params->raid_stride);
  1322		if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH)
> 1323			es->s_raid_stripe_width =
  1324				cpu_to_le16(params->raid_stripe_width);
  1325		strscpy_pad(es->s_mount_opts, params->mount_opts);
  1326		if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
  1327			es->s_feature_compat |=
  1328				cpu_to_le32(params->set_feature_compat_mask);
  1329			es->s_feature_incompat |=
  1330				cpu_to_le32(params->set_feature_incompat_mask);
  1331			es->s_feature_ro_compat |=
  1332				cpu_to_le32(params->set_feature_ro_compat_mask);
  1333			es->s_feature_compat &=
  1334				~cpu_to_le32(params->clear_feature_compat_mask);
  1335			es->s_feature_incompat &=
  1336				~cpu_to_le32(params->clear_feature_incompat_mask);
  1337			es->s_feature_ro_compat &=
  1338				~cpu_to_le32(params->clear_feature_ro_compat_mask);
  1339			if (params->set_feature_compat_mask &
  1340			    EXT4_FEATURE_COMPAT_DIR_INDEX)
  1341				es->s_def_hash_version = sbi->s_def_hash_version;
  1342			if (params->set_feature_incompat_mask &
  1343			    EXT4_FEATURE_INCOMPAT_CSUM_SEED)
  1344				es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed);
  1345		}
  1346		if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK)
  1347			es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
  1348	}
  1349	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* Re: [PATCH 1/3] ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
From: Darrick J. Wong @ 2025-09-11 22:27 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api, stable
In-Reply-To: <20250908-tune2fs-v1-1-e3a6929f3355@mit.edu>

On Mon, Sep 08, 2025 at 11:15:48PM -0400, Theodore Ts'o via B4 Relay wrote:
> From: Theodore Ts'o <tytso@mit.edu>
> 
> Unlike other strings in the ext4 superblock, we rely on tune2fs to
> make sure s_mount_opts is NUL terminated.  Harden
> parse_apply_sb_mount_options() by treating s_mount_opts as a potential
> __nonstring.

Uh.... does that mean that a filesystem with exactly 64 bytes worth of
mount option string (and no trailing null) could do something malicious?

My guess is that s_usr_quota_inum mostly saves us, but a nastycrafted
filesystem with more than 2^24 inodes could cause an out of bounds
memory access?  But that most likely will just fail the mount option
parser anyway?

--D

> 
> Cc: stable@vger.kernel.org
> Fixes: 8b67f04ab9de ("ext4: Add mount options in superblock")
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
>  fs/ext4/super.c | 17 +++++------------
>  1 file changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 699c15db28a82f26809bf68533454a242596f0fd..94c98446c84f9a4614971d246ca7f001de610a8a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -2460,7 +2460,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
>  					struct ext4_fs_context *m_ctx)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(sb);
> -	char *s_mount_opts = NULL;
> +	char s_mount_opts[65];
>  	struct ext4_fs_context *s_ctx = NULL;
>  	struct fs_context *fc = NULL;
>  	int ret = -ENOMEM;
> @@ -2468,15 +2468,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
>  	if (!sbi->s_es->s_mount_opts[0])
>  		return 0;
>  
> -	s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
> -				sizeof(sbi->s_es->s_mount_opts),
> -				GFP_KERNEL);
> -	if (!s_mount_opts)
> -		return ret;
> +	strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
>  
>  	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
>  	if (!fc)
> -		goto out_free;
> +		return -ENOMEM;
>  
>  	s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
>  	if (!s_ctx)
> @@ -2508,11 +2504,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
>  	ret = 0;
>  
>  out_free:
> -	if (fc) {
> -		ext4_fc_free(fc);
> -		kfree(fc);
> -	}
> -	kfree(s_mount_opts);
> +	ext4_fc_free(fc);
> +	kfree(fc);
>  	return ret;
>  }
>  
> 
> -- 
> 2.51.0
> 
> 
> 

^ permalink raw reply

* Re: [PATCH 2/3] ext4: add support for 32-bit default reserved uid and gid values
From: Darrick J. Wong @ 2025-09-11 22:31 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api
In-Reply-To: <20250908-tune2fs-v1-2-e3a6929f3355@mit.edu>

On Mon, Sep 08, 2025 at 11:15:49PM -0400, Theodore Ts'o via B4 Relay wrote:
> From: Theodore Ts'o <tytso@mit.edu>
> 
> Support for specifying the default user id and group id that is
> allowed to use the reserved block space was added way back when Linux
> only supported 16-bit uid's and gid's.  (Yeah, that long ago.)  It's
> not a commonly used feature, but let's add support for 32-bit user and
> group id's.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
>  fs/ext4/ext4.h  | 16 +++++++++++++++-
>  fs/ext4/super.c |  8 ++++----
>  2 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 01a6e2de7fc3ef0e20b039d3200b9c9bd656f59f..4bfcd5f0c74fda30db4009ee28fbee00a2f6b76f 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1442,7 +1442,9 @@ struct ext4_super_block {
>  	__le16  s_encoding;		/* Filename charset encoding */
>  	__le16  s_encoding_flags;	/* Filename charset encoding flags */
>  	__le32  s_orphan_file_inum;	/* Inode for tracking orphan inodes */
> -	__le32	s_reserved[94];		/* Padding to the end of the block */
> +	__le16	s_def_resuid_hi;
> +	__le16	s_def_resgid_hi;
> +	__le32	s_reserved[93];		/* Padding to the end of the block */

Does anything actually check that s_reserved is zero?  I couldn't find
any:

$ git grep -w s_reserved fs/ext4 fs/ext2
fs/ext2/ext2.h:480:     __u32   s_reserved[190];        /* Padding to the end of the block */
fs/ext4/ext4.h:1445:    __le32  s_reserved[94];         /* Padding to the end of the block */

$ git grep -w s_reserved lib/ext2fs/ e2fsck/
lib/ext2fs/ext2_fs.h:777:       __le32  s_reserved[94];         /* Padding to the end of the block */
lib/ext2fs/swapfs.c:135:        /* catch when new fields are used from s_reserved */
lib/ext2fs/swapfs.c:136:        EXT2FS_BUILD_BUG_ON(sizeof(sb->s_reserved) != 94 * sizeof(__le32));
lib/ext2fs/tst_super_size.c:156:        check_field(s_reserved, 94 * 4);

Is there a risk that some garbage written to s_reserved (and not caught
by either the kernel or e2fsck) will now appear as a "legitimate" resuid
value?

--D

>  	__le32	s_checksum;		/* crc32c(superblock) */
>  };
>  
> @@ -1812,6 +1814,18 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
>  		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
>  }
>  
> +static inline int ext4_get_resuid(struct ext4_super_block *es)
> +{
> +	return(le16_to_cpu(es->s_def_resuid) |
> +	       (le16_to_cpu(es->s_def_resuid_hi) << 16));
> +}
> +
> +static inline int ext4_get_resgid(struct ext4_super_block *es)
> +{
> +	return(le16_to_cpu(es->s_def_resgid) |
> +	       (le16_to_cpu(es->s_def_resgid_hi) << 16));
> +}
> +
>  /*
>   * Returns: sbi->field[index]
>   * Used to access an array element from the following sbi fields which require
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 94c98446c84f9a4614971d246ca7f001de610a8a..0256c8f7c6cee2b8d9295f2fa9a7acd904382e83 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -2951,11 +2951,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
>  	}
>  
>  	if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
> -	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
> +	    ext4_get_resuid(es) != EXT4_DEF_RESUID)
>  		SEQ_OPTS_PRINT("resuid=%u",
>  				from_kuid_munged(&init_user_ns, sbi->s_resuid));
>  	if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
> -	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
> +	    ext4_get_resgid(es) != EXT4_DEF_RESGID)
>  		SEQ_OPTS_PRINT("resgid=%u",
>  				from_kgid_munged(&init_user_ns, sbi->s_resgid));
>  	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
> @@ -5270,8 +5270,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
>  
>  	ext4_set_def_opts(sb, es);
>  
> -	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
> -	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
> +	sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
> +	sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
>  	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
>  	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
>  	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
> 
> -- 
> 2.51.0
> 
> 
> 

^ permalink raw reply

* Re: [PATCH 3/3] ext4: implemet new ioctls to set and get superblock parameters
From: Darrick J. Wong @ 2025-09-11 22:40 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-api
In-Reply-To: <20250908-tune2fs-v1-3-e3a6929f3355@mit.edu>

On Mon, Sep 08, 2025 at 11:15:50PM -0400, Theodore Ts'o via B4 Relay wrote:
> From: Theodore Ts'o <tytso@mit.edu>
> 
> Implement the EXT4_IOC_GET_TUNE_SB_PARAM and
> EXT4_IOC_SET_TUNE_SB_PARAM ioctls, which allow certains superblock
> parameters to be set while the file system is mounted, without needing
> write access to the block device.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
>  fs/ext4/ioctl.c           | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  include/uapi/linux/ext4.h |  75 ++++++++++++++++++++++
>  2 files changed, 324 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 84e3c73952d72e436429489f5fc8b7ae1c01c7a1..569c98c962af63130c0119f60788a26a2807bd86 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -27,14 +27,16 @@
>  #include "fsmap.h"
>  #include <trace/events/ext4.h>
>  
> -typedef void ext4_update_sb_callback(struct ext4_super_block *es,
> -				       const void *arg);
> +typedef void ext4_update_sb_callback(struct ext4_sb_info *sbi,
> +				     struct ext4_super_block *es,
> +				     const void *arg);
>  
>  /*
>   * Superblock modification callback function for changing file system
>   * label
>   */
> -static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
> +static void ext4_sb_setlabel(struct ext4_sb_info *sbi,
> +			     struct ext4_super_block *es, const void *arg)
>  {
>  	/* Sanity check, this should never happen */
>  	BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
> @@ -46,7 +48,8 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
>   * Superblock modification callback function for changing file system
>   * UUID.
>   */
> -static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg)
> +static void ext4_sb_setuuid(struct ext4_sb_info *sbi,
> +			    struct ext4_super_block *es, const void *arg)
>  {
>  	memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE);
>  }
> @@ -71,7 +74,7 @@ int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
>  		goto out_err;
>  
>  	lock_buffer(bh);
> -	func(es, arg);
> +	func(sbi, es, arg);
>  	ext4_superblock_csum_set(sb);
>  	unlock_buffer(bh);
>  
> @@ -149,7 +152,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
>  		unlock_buffer(bh);
>  		goto out_bh;
>  	}
> -	func(es, arg);
> +	func(EXT4_SB(sb), es, arg);
>  	if (ext4_has_feature_metadata_csum(sb))
>  		es->s_checksum = ext4_superblock_csum(es);
>  	set_buffer_uptodate(bh);
> @@ -1230,6 +1233,239 @@ static int ext4_ioctl_setuuid(struct file *filp,
>  	return ret;
>  }
>  
> +
> +#define TUNE_OPS_SUPPORTED (EXT4_TUNE_FL_ERRORS_BEHAVIOR |    \
> +	EXT4_TUNE_FL_MNT_COUNT | EXT4_TUNE_FL_MAX_MNT_COUNT | \
> +	EXT4_TUNE_FL_CHECKINTRVAL | EXT4_TUNE_FL_LAST_CHECK_TIME | \
> +	EXT4_TUNE_FL_RESERVED_BLOCKS | EXT4_TUNE_FL_RESERVED_UID | \
> +	EXT4_TUNE_FL_RESERVED_GID | EXT4_TUNE_FL_DEFAULT_MNT_OPTS | \
> +	EXT4_TUNE_FL_DEF_HASH_ALG | EXT4_TUNE_FL_RAID_STRIDE | \
> +	EXT4_TUNE_FL_RAID_STRIPE_WIDTH | EXT4_TUNE_FL_MOUNT_OPTS | \
> +	EXT4_TUNE_FL_FEATURES | EXT4_TUNE_FL_EDIT_FEATURES | \
> +	EXT4_TUNE_FL_FORCE_FSCK)
> +
> +static int ext4_ioctl_get_tune_sb(struct ext4_sb_info *sbi,
> +				  struct ext4_tune_sb_params __user *params)
> +{
> +	struct ext4_tune_sb_params ret;
> +	struct ext4_super_block *es = sbi->s_es;
> +
> +	memset(&ret, 0, sizeof(ret));
> +	ret.set_flags = TUNE_OPS_SUPPORTED;
> +	ret.errors_behavior = es->s_errors;
> +	ret.mnt_count = le16_to_cpu(es->s_mnt_count);
> +	ret.max_mnt_count = le16_to_cpu(es->s_max_mnt_count);
> +	ret.checkinterval = le32_to_cpu(es->s_checkinterval);
> +	ret.last_check_time = le32_to_cpu(es->s_lastcheck);
> +	ret.reserved_blocks = ext4_r_blocks_count(es);
> +	ret.blocks_count = ext4_blocks_count(es);
> +	ret.reserved_uid = ext4_get_resuid(es);
> +	ret.reserved_gid = ext4_get_resgid(es);
> +	ret.default_mnt_opts = le32_to_cpu(es->s_default_mount_opts);
> +	ret.def_hash_alg = es->s_def_hash_version;
> +	ret.raid_stride = le16_to_cpu(es->s_raid_stride);
> +	ret.raid_stripe_width = le16_to_cpu(es->s_raid_stripe_width);
> +	strscpy_pad(ret.mount_opts, es->s_mount_opts);
> +	ret.feature_compat = le32_to_cpu(es->s_feature_compat);
> +	ret.feature_incompat = le32_to_cpu(es->s_feature_incompat);
> +	ret.feature_ro_compat = le32_to_cpu(es->s_feature_ro_compat);
> +	ret.set_feature_compat_mask = EXT4_TUNE_SET_COMPAT_SUPP;
> +	ret.set_feature_incompat_mask = EXT4_TUNE_SET_INCOMPAT_SUPP;
> +	ret.set_feature_ro_compat_mask = EXT4_TUNE_SET_RO_COMPAT_SUPP;
> +	ret.clear_feature_compat_mask = EXT4_TUNE_CLEAR_COMPAT_SUPP;
> +	ret.clear_feature_incompat_mask = EXT4_TUNE_CLEAR_INCOMPAT_SUPP;
> +	ret.clear_feature_ro_compat_mask = EXT4_TUNE_CLEAR_RO_COMPAT_SUPP;
> +	if (copy_to_user(params, &ret, sizeof(ret)))
> +		return -EFAULT;
> +	return 0;
> +}
> +
> +static void ext4_sb_setparams(struct ext4_sb_info *sbi,
> +			      struct ext4_super_block *es, const void *arg)
> +{
> +	const struct ext4_tune_sb_params *params = arg;
> +
> +	if (params->set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR)
> +		es->s_errors = cpu_to_le16(params->errors_behavior);
> +	if (params->set_flags & EXT4_TUNE_FL_MNT_COUNT)
> +		es->s_mnt_count = cpu_to_le16(params->mnt_count);
> +	if (params->set_flags & EXT4_TUNE_FL_MAX_MNT_COUNT)
> +		es->s_max_mnt_count = cpu_to_le16(params->max_mnt_count);
> +	if (params->set_flags & EXT4_TUNE_FL_CHECKINTRVAL)
> +		es->s_checkinterval = cpu_to_le32(params->checkinterval);
> +	if (params->set_flags & EXT4_TUNE_FL_LAST_CHECK_TIME)
> +		es->s_lastcheck = cpu_to_le32(params->last_check_time);
> +	if (params->set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) {
> +		ext4_fsblk_t blk = params->reserved_blocks;
> +
> +		es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
> +		es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
> +	}
> +	if (params->set_flags & EXT4_TUNE_FL_RESERVED_UID) {
> +		int uid = params->reserved_uid;
> +
> +		es->s_def_resuid = cpu_to_le16(uid & 0xFFFF);
> +		es->s_def_resuid_hi = cpu_to_le16(uid >> 16);
> +	}
> +	if (params->set_flags & EXT4_TUNE_FL_RESERVED_GID) {
> +		int gid = params->reserved_gid;
> +
> +		es->s_def_resgid = cpu_to_le16(gid & 0xFFFF);
> +		es->s_def_resgid_hi = cpu_to_le16(gid >> 16);
> +	}
> +	if (params->set_flags & EXT4_TUNE_FL_DEFAULT_MNT_OPTS)
> +		es->s_default_mount_opts = cpu_to_le32(params->default_mnt_opts);
> +	if (params->set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
> +		es->s_def_hash_version = params->def_hash_alg;
> +	if (params->set_flags & EXT4_TUNE_FL_RAID_STRIDE)
> +		es->s_raid_stride = cpu_to_le16(params->raid_stride);
> +	if (params->set_flags & EXT4_TUNE_FL_RAID_STRIPE_WIDTH)
> +		es->s_raid_stripe_width =
> +			cpu_to_le16(params->raid_stripe_width);
> +	strscpy_pad(es->s_mount_opts, params->mount_opts);
> +	if (params->set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
> +		es->s_feature_compat |=
> +			cpu_to_le32(params->set_feature_compat_mask);
> +		es->s_feature_incompat |=
> +			cpu_to_le32(params->set_feature_incompat_mask);
> +		es->s_feature_ro_compat |=
> +			cpu_to_le32(params->set_feature_ro_compat_mask);
> +		es->s_feature_compat &=
> +			~cpu_to_le32(params->clear_feature_compat_mask);
> +		es->s_feature_incompat &=
> +			~cpu_to_le32(params->clear_feature_incompat_mask);
> +		es->s_feature_ro_compat &=
> +			~cpu_to_le32(params->clear_feature_ro_compat_mask);
> +		if (params->set_feature_compat_mask &
> +		    EXT4_FEATURE_COMPAT_DIR_INDEX)
> +			es->s_def_hash_version = sbi->s_def_hash_version;
> +		if (params->set_feature_incompat_mask &
> +		    EXT4_FEATURE_INCOMPAT_CSUM_SEED)
> +			es->s_checksum_seed = cpu_to_le32(sbi->s_csum_seed);
> +	}
> +	if (params->set_flags & EXT4_TUNE_FL_FORCE_FSCK)
> +		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
> +}
> +
> +static int ext4_ioctl_set_tune_sb(struct file *filp,
> +				  struct ext4_tune_sb_params __user *in)
> +{
> +	struct ext4_tune_sb_params params;
> +	struct super_block *sb = file_inode(filp)->i_sb;
> +	struct ext4_sb_info *sbi = EXT4_SB(sb);
> +	struct ext4_super_block *es = sbi->s_es;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (copy_from_user(&params, in, sizeof(params)))
> +		return -EFAULT;
> +
> +	if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0)
> +		return -EOPNOTSUPP;
> +
> +	if ((params.set_flags & EXT4_TUNE_FL_ERRORS_BEHAVIOR) &&
> +	    (params.errors_behavior > EXT4_ERRORS_PANIC))
> +		return -EINVAL;
> +
> +	if ((params.set_flags & EXT4_TUNE_FL_RESERVED_BLOCKS) &&
> +	    (params.reserved_blocks > ext4_blocks_count(sbi->s_es) / 2))
> +		return -EINVAL;
> +	if ((params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG) &&
> +	    ((params.def_hash_alg > DX_HASH_LAST) ||
> +	     (params.def_hash_alg == DX_HASH_SIPHASH)))
> +		return -EINVAL;
> +	if ((params.set_flags & EXT4_TUNE_FL_FEATURES) &&
> +	    (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES))
> +		return -EINVAL;

What's the difference between _FL_FEATURES and _FL_EDIT_FEATURES?

> +
> +	if (params.set_flags & EXT4_TUNE_FL_FEATURES) {
> +		params.set_feature_compat_mask =
> +			params.feature_compat &
> +			~le32_to_cpu(es->s_feature_compat);
> +		params.set_feature_incompat_mask =
> +			params.feature_incompat &
> +			~le32_to_cpu(es->s_feature_incompat);
> +		params.set_feature_ro_compat_mask =
> +			params.feature_ro_compat &
> +			~le32_to_cpu(es->s_feature_ro_compat);
> +		params.clear_feature_compat_mask =
> +			~params.feature_compat &
> +			le32_to_cpu(es->s_feature_compat);
> +		params.clear_feature_incompat_mask =
> +			~params.feature_incompat &
> +			le32_to_cpu(es->s_feature_incompat);
> +		params.clear_feature_ro_compat_mask =
> +			~params.feature_ro_compat &
> +			le32_to_cpu(es->s_feature_ro_compat);
> +		params.set_flags |= EXT4_TUNE_FL_EDIT_FEATURES;
> +	}
> +	if (params.set_flags & EXT4_TUNE_FL_EDIT_FEATURES) {
> +		if ((params.set_feature_compat_mask &
> +		     ~EXT4_TUNE_SET_COMPAT_SUPP) ||
> +		    (params.set_feature_incompat_mask &
> +		     ~EXT4_TUNE_SET_INCOMPAT_SUPP) ||
> +		    (params.set_feature_ro_compat_mask &
> +		     ~EXT4_TUNE_SET_RO_COMPAT_SUPP) ||
> +		    (params.clear_feature_compat_mask &
> +		     ~EXT4_TUNE_CLEAR_COMPAT_SUPP) ||
> +		    (params.clear_feature_incompat_mask &
> +		     ~EXT4_TUNE_CLEAR_INCOMPAT_SUPP) ||
> +		    (params.clear_feature_ro_compat_mask &
> +		     ~EXT4_TUNE_CLEAR_RO_COMPAT_SUPP))
> +			return -EOPNOTSUPP;
> +
> +		/*
> +		 * Filter out the features that are already set from
> +		 * the set_mask.
> +		 */
> +		params.set_feature_compat_mask &=
> +			~le32_to_cpu(es->s_feature_compat);
> +		params.set_feature_incompat_mask &=
> +			~le32_to_cpu(es->s_feature_incompat);
> +		params.set_feature_ro_compat_mask &=
> +			~le32_to_cpu(es->s_feature_ro_compat);
> +		if ((params.set_feature_compat_mask &
> +		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
> +		    !ext4_has_feature_dir_index(sb)) {
> +			uuid_t	uu;
> +
> +			memcpy(&uu, sbi->s_hash_seed, UUID_SIZE);
> +			if (uuid_is_null(&uu))
> +				generate_random_uuid((char *)
> +						     &sbi->s_hash_seed);
> +			if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
> +				sbi->s_def_hash_version = params.def_hash_alg;
> +			else if (sbi->s_def_hash_version == 0)
> +				sbi->s_def_hash_version = DX_HASH_HALF_MD4;
> +			if (!(es->s_flags &
> +			      cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH)) &&
> +			    !(es->s_flags &
> +			      cpu_to_le32(EXT2_FLAGS_SIGNED_HASH))) {
> +#ifdef __CHAR_UNSIGNED__
> +				sbi->s_hash_unsigned = 3;
> +#else
> +				sbi->s_hash_unsigned = 0;
> +#endif
> +			}
> +		}
> +	}
> +
> +
> +	ret = mnt_want_write_file(filp);
> +	if (ret)
> +		return ret;
> +
> +	ret = ext4_update_superblocks_fn(sb, ext4_sb_setparams, &params);
> +	mnt_drop_write_file(filp);
> +
> +	if (params.set_flags & EXT4_TUNE_FL_DEF_HASH_ALG)
> +		sbi->s_def_hash_version = params.def_hash_alg;
> +
> +	return ret;
> +}
> +
>  static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  {
>  	struct inode *inode = file_inode(filp);
> @@ -1616,6 +1852,11 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg);
>  	case EXT4_IOC_SETFSUUID:
>  		return ext4_ioctl_setuuid(filp, (const void __user *)arg);
> +	case EXT4_IOC_GET_TUNE_SB_PARAM:
> +		return ext4_ioctl_get_tune_sb(EXT4_SB(sb),
> +					      (void __user *)arg);
> +	case EXT4_IOC_SET_TUNE_SB_PARAM:
> +		return ext4_ioctl_set_tune_sb(filp, (void __user *)arg);
>  	default:
>  		return -ENOTTY;
>  	}
> @@ -1703,7 +1944,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  }
>  #endif
>  
> -static void set_overhead(struct ext4_super_block *es, const void *arg)
> +static void set_overhead(struct ext4_sb_info *sbi,
> +			 struct ext4_super_block *es, const void *arg)
>  {
>  	es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
>  }
> diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h
> index 1c4c2dd29112cda9f7dc91d917492cffc33ee524..145875fd633772e76ce7fd8bc0fef136ff620d2d 100644
> --- a/include/uapi/linux/ext4.h
> +++ b/include/uapi/linux/ext4.h
> @@ -33,6 +33,8 @@
>  #define EXT4_IOC_CHECKPOINT		_IOW('f', 43, __u32)
>  #define EXT4_IOC_GETFSUUID		_IOR('f', 44, struct fsuuid)
>  #define EXT4_IOC_SETFSUUID		_IOW('f', 44, struct fsuuid)
> +#define EXT4_IOC_GET_TUNE_SB_PARAM	_IOR('f', 45, struct ext4_tune_sb_params)
> +#define EXT4_IOC_SET_TUNE_SB_PARAM	_IOW('f', 46, struct ext4_tune_sb_params)
>  
>  #define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32)
>  
> @@ -108,6 +110,79 @@ struct ext4_new_group_input {
>  	__u16 unused;
>  };
>  
> +struct ext4_tune_sb_params {
> +	__u32 set_flags;
> +	__u32 checkinterval;
> +	__u16 errors_behavior;
> +	__u16 mnt_count;
> +	__u16 max_mnt_count;
> +	__u16 raid_stride;
> +	__u64 last_check_time;
> +	__u64 reserved_blocks;
> +	__u64 blocks_count;
> +	__u32 default_mnt_opts;
> +	__u32 reserved_uid;
> +	__u32 reserved_gid;
> +	__u32 raid_stripe_width;
> +	__u8  def_hash_alg;
> +	__u8  pad_1;
> +	__u16 pad_2;
> +	__u32 feature_compat;
> +	__u32 feature_incompat;
> +	__u32 feature_ro_compat;
> +	__u32 set_feature_compat_mask;
> +	__u32 set_feature_incompat_mask;
> +	__u32 set_feature_ro_compat_mask;
> +	__u32 clear_feature_compat_mask;
> +	__u32 clear_feature_incompat_mask;
> +	__u32 clear_feature_ro_compat_mask;
> +	__u8  mount_opts[64];
> +	__u8  pad[64];
> +};
> +
> +#define EXT4_TUNE_FL_ERRORS_BEHAVIOR	0x00000001
> +#define EXT4_TUNE_FL_MNT_COUNT		0x00000002
> +#define EXT4_TUNE_FL_MAX_MNT_COUNT	0x00000004
> +#define EXT4_TUNE_FL_CHECKINTRVAL	0x00000008
> +#define EXT4_TUNE_FL_LAST_CHECK_TIME	0x00000010
> +#define EXT4_TUNE_FL_RESERVED_BLOCKS	0x00000020
> +#define EXT4_TUNE_FL_RESERVED_UID	0x00000040
> +#define EXT4_TUNE_FL_RESERVED_GID	0x00000080
> +#define EXT4_TUNE_FL_DEFAULT_MNT_OPTS	0x00000100
> +#define EXT4_TUNE_FL_DEF_HASH_ALG	0x00000200
> +#define EXT4_TUNE_FL_RAID_STRIDE	0x00000400
> +#define EXT4_TUNE_FL_RAID_STRIPE_WIDTH	0x00000800
> +#define EXT4_TUNE_FL_MOUNT_OPTS		0x00001000
> +#define EXT4_TUNE_FL_FEATURES		0x00002000
> +#define EXT4_TUNE_FL_EDIT_FEATURES	0x00004000
> +#define EXT4_TUNE_FL_FORCE_FSCK		0x00008000
> +
> +#define EXT4_TUNE_SET_COMPAT_SUPP \
> +		(EXT4_FEATURE_COMPAT_DIR_INDEX |	\
> +		 EXT4_FEATURE_COMPAT_STABLE_INODES)
> +#define EXT4_TUNE_SET_INCOMPAT_SUPP \
> +		(EXT4_FEATURE_INCOMPAT_EXTENTS |	\
> +		 EXT4_FEATURE_INCOMPAT_EA_INODE |	\
> +		 EXT4_FEATURE_INCOMPAT_ENCRYPT |	\
> +		 EXT4_FEATURE_INCOMPAT_CSUM_SEED |	\
> +		 EXT4_FEATURE_INCOMPAT_LARGEDIR |	\
> +		 EXT4_FEATURE_INCOMPAT_CASEFOLD)
> +#define EXT4_TUNE_SET_RO_COMPAT_SUPP \
> +		(EXT4_FEATURE_RO_COMPAT_LARGE_FILE |	\
> +		 EXT4_FEATURE_RO_COMPAT_DIR_NLINK |	\
> +		 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE |	\
> +		 EXT4_FEATURE_RO_COMPAT_READONLY |	\
> +		 EXT4_FEATURE_RO_COMPAT_PROJECT |	\
> +		 EXT4_FEATURE_RO_COMPAT_VERITY)
> +
> +#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0)
> +#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0)
> +#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP \
> +		(EXT4_FEATURE_RO_COMPAT_LARGE_FILE |	\
> +		 EXT4_FEATURE_RO_COMPAT_DIR_NLINK |	\
> +		 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE |	\
> +		 EXT4_FEATURE_RO_COMPAT_PROJECT)

Is it actually safe to clear these without scanning the filesystem to
make sure nobody's using these features?

--D

> +
>  /*
>   * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
>   * It indicates that the entry in extent status cache is for a hole.
> 
> -- 
> 2.51.0
> 
> 
> 

^ permalink raw reply

* Re: [PATCH 1/3] ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
From: Theodore Ts'o @ 2025-09-12  2:12 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-ext4, linux-api, stable, Kees Cook, jannh
In-Reply-To: <20250911222700.GC8084@frogsfrogsfrogs>

On Thu, Sep 11, 2025 at 03:27:00PM -0700, Darrick J. Wong wrote:
> On Mon, Sep 08, 2025 at 11:15:48PM -0400, Theodore Ts'o via B4 Relay wrote:
> > From: Theodore Ts'o <tytso@mit.edu>
> > 
> > Unlike other strings in the ext4 superblock, we rely on tune2fs to
> > make sure s_mount_opts is NUL terminated.  Harden
> > parse_apply_sb_mount_options() by treating s_mount_opts as a potential
> > __nonstring.
> 
> Uh.... does that mean that a filesystem with exactly 64 bytes worth of
> mount option string (and no trailing null) could do something malicious?

Maybe.... I'm surprised syzkaller hasn't managed to create a
maliciously fuzzed file system along these lines.

This was one of the things that I found while I was poking about in
code that I hadn't examined in years.  And I guess the kernel
hardening folks have been looking for strndup() as a deprecated
interface, but apparently they haven't targetted kstrndup() yet.

> My guess is that s_usr_quota_inum mostly saves us, but a nastycrafted
> filesystem with more than 2^24 inodes could cause an out of bounds
> memory access?  But that most likely will just fail the mount option
> parser anyway?

Actually, s_usr_quota_inum won't help, because s_mount_opts is copied
into allocated memory using kstrndup().  So the buffer overrun is
going to be in the allocated memory buffer, and since parse_options()
uses strsep() it could potentially modify an adajacent string/buffer
by replacing ',' and '=' bytes with NUL characters.  I'll leave to
security engineers to see if they can turn it into a usuable exploit,
although I've always said that mounting untrusted file systems isn't a
wise thing for a paranoid system administrator to do/allow, which is
why I'm a big fan of your fuse2fs work.  :-)

						- Ted

^ permalink raw reply

* Re: [PATCH 2/3] ext4: add support for 32-bit default reserved uid and gid values
From: Theodore Ts'o @ 2025-09-12  2:57 UTC (permalink / raw)
  To: Darrick J. Wong, G; +Cc: linux-ext4, linux-api
In-Reply-To: <20250911223121.GD8084@frogsfrogsfrogs>

On Thu, Sep 11, 2025 at 03:31:21PM -0700, Darrick J. Wong wrote:
> 
> Is there a risk that some garbage written to s_reserved (and not caught
> by either the kernel or e2fsck) will now appear as a "legitimate" resuid
> value?

The superblock is checksumed, so the risk would be that some
impleentation modifies the superblock and updates s_reserved for some
reason.  But they could do that to any superblock field, or to the low
16 bits of s_resuid/s_resgid today, and that's something that neither
the kernel or e2fsck could check.

The mke2fs program zeroes all of the unused/reserved portions of the
superblock, so the risk is some random non-Linux implementation (e.g.,
GNU Hurd or BSD) had hijacked some reserved field without coordinating
with upstream ext4.  I thought about using some kind of compat feature
flag, but it probably wouldn't help since the other implementation
would likely not bother to use their own feature flag since that would
prevent the file system to be mounted with Linux.

Currently, someone tried to run "tune2fs -u 146878 /tmp/foo.img" we'll
silently drop the high 16 bits:

% tune2fs -u 146878 /tmp/foo.img 
tune2fs 1.47.3-rc2 (12-Jun-2025)
Setting reserved blocks uid to 146878
% dumpe2fs -h /tmp/foo.img | grep uid
dumpe2fs 1.47.3-rc2 (12-Jun-2025)
Reserved blocks uid:      15806 (user tytso)

And if we have implementations that support 32-bit reserved
uid's/gid's, and the file system is mounted on an older kernel, it
will simply use a different reserved uid (e.g., 15806 instead of
146878).  But we're kind of confused today, and in practice most of
the time people will be using low reserved uid's/gid's (e.g., 1 for
daemon, etc.).

						- Ted

^ permalink raw reply

* Re: [PATCH 3/3] ext4: implemet new ioctls to set and get superblock parameters
From: Theodore Ts'o @ 2025-09-12  3:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-ext4, linux-api
In-Reply-To: <20250911224019.GE8084@frogsfrogsfrogs>

On Thu, Sep 11, 2025 at 03:40:19PM -0700, Darrick J. Wong wrote:
>
> What's the difference between _FL_FEATURES and _FL_EDIT_FEATURES?

We have three sets of 

_FL_FEATURES allows the user to set the features via:

	__u32 feature_compat;
	__u32 feature_incompat;
	__u32 feature_ro_compat;

... while _FS_EDIT_FEATURES allows the user to set or clear specific
feature or feature(s) using these fields:

	__u32 set_feature_compat_mask;
	__u32 set_feature_incompat_mask;
	__u32 set_feature_ro_compat_mask;
	__u32 clear_feature_compat_mask;
	__u32 clear_feature_incompat_mask;
	__u32 clear_feature_ro_compat_mask;

I originally only implemented _FS_EDIT_EFATURES but it turns out that
given how tune2fs() and e2p_edit_feateurs2() was implemented,
_FS_FEATURES was a lot more convenient.  But I kept _FS_EDIT_FEATURES
in case some other users wanted an easy way to, say, "just enable
feature X" using a single ioctl.

> > +#define EXT4_TUNE_CLEAR_COMPAT_SUPP (0)
> > +#define EXT4_TUNE_CLEAR_INCOMPAT_SUPP (0)
> > +#define EXT4_TUNE_CLEAR_RO_COMPAT_SUPP \
> > +		(EXT4_FEATURE_RO_COMPAT_LARGE_FILE |	\
> > +		 EXT4_FEATURE_RO_COMPAT_DIR_NLINK |	\
> > +		 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE |	\
> > +		 EXT4_FEATURE_RO_COMPAT_PROJECT)
> 
> Is it actually safe to clear these without scanning the filesystem to
> make sure nobody's using these features?

Hmm.... probably not.  For some of these features, tune2fs will issue
a "pleas run e2fsck -f" before mounting the file system.  All of these
featrues tune2fs will allow being cleared on a mounted file system,
but looking at this more closely, I probably *shouldn't* have allowed
tune2fs to remove the feature wile the file system is mounted.  (For
example, tune2fs -O ^project" will try to clear they project quota
inode even if the file system is mounted, hilarity would soon
follow...)

						- Ted

^ permalink raw reply

* [PATCH] sched/deadline: Add reporting of runtime left & abs deadline to sched_getattr() for DEADLINE tasks
From: Tommaso Cucinotta @ 2025-09-12  5:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman, Valentin Schneider,
	linux-kernel, linux-api, Tommaso Cucinotta, Tommaso Cucinotta

I'm resending this patch proposal after having addressed a few Juri's
comments, and a rebase on top of the post-august-break tip sched/core.

The SCHED_DEADLINE scheduler allows reading the statically configured
run-time, deadline, and period parameters through the sched_getattr()
system call. However, there is no immediate way to access, from user space,
the current parameters used within the scheduler: the instantaneous runtime
left in the current cycle, as well as the current absolute deadline.

The `flags' sched_getattr() parameter, so far mandated to contain zero,
now supports the SCHED_GETATTR_FLAG_DL_DYNAMIC=1 flag, to request
retrieval of the leftover runtime and absolute deadline, converted to a
CLOCK_MONOTONIC reference, instead of the statically configured parameters.

This feature is useful for adaptive SCHED_DEADLINE tasks that need to
modify their behavior depending on whether or not there is enough runtime
left in the current period, and/or what is the current absolute deadline.

Notes:
- before returning the instantaneous parameters, the runtime is updated;
- the abs deadline is returned shifted from rq_clock() to ktime_get_ns(),
  in CLOCK_MONOTONIC reference; this causes multiple invocations from the
  same period to return values that may differ for a few ns (showing some
  small drift), albeit the deadline doesn't move, in rq_clock() reference;
- the abs deadline value returned to user-space, as unsigned 64-bit value,
  can represent nearly 585 years since boot time;
- setting flags=0 provides the old behavior (retrieve static parameters).

See also the notes from discussion held at OSPM 2025 on the topic
"Making user space aware of current deadline-scheduler parameters".

Signed-off-by: Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>

^ permalink raw reply

* [PATCH] sched/deadline: Add reporting of runtime left & abs deadline to sched_getattr() for DEADLINE tasks
From: Tommaso Cucinotta @ 2025-09-12  5:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman, Valentin Schneider,
	linux-kernel, linux-api, Tommaso Cucinotta, Tommaso Cucinotta
In-Reply-To: <20250912053937.31636-1-tommaso.cucinotta@santannapisa.it>

The SCHED_DEADLINE scheduler allows reading the statically configured
run-time, deadline, and period parameters through the sched_getattr()
system call. However, there is no immediate way to access, from user space,
the current parameters used within the scheduler: the instantaneous runtime
left in the current cycle, as well as the current absolute deadline.

The `flags' sched_getattr() parameter, so far mandated to contain zero,
now supports the SCHED_GETATTR_FLAG_DL_DYNAMIC=1 flag, to request
retrieval of the leftover runtime and absolute deadline, converted to a
CLOCK_MONOTONIC reference, instead of the statically configured parameters.

This feature is useful for adaptive SCHED_DEADLINE tasks that need to
modify their behavior depending on whether or not there is enough runtime
left in the current period, and/or what is the current absolute deadline.

Notes:
- before returning the instantaneous parameters, the runtime is updated;
- the abs deadline is returned shifted from rq_clock() to ktime_get_ns(),
  in CLOCK_MONOTONIC reference; this causes multiple invocations from the
  same period to return values that may differ for a few ns (showing some
  small drift), albeit the deadline doesn't move, in rq_clock() reference;
- the abs deadline value returned to user-space, as unsigned 64-bit value,
  can represent nearly 585 years since boot time;
- setting flags=0 provides the old behavior (retrieve static parameters).

See also the notes from discussion held at OSPM 2025 on the topic
"Making user space aware of current deadline-scheduler parameters".

Signed-off-by: Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>
---
 include/uapi/linux/sched.h |  3 +++
 kernel/sched/deadline.c    | 19 ++++++++++++++++---
 kernel/sched/sched.h       |  2 +-
 kernel/sched/syscalls.c    | 16 +++++++++++-----
 4 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 359a14cc..52b69ce8 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -146,4 +146,7 @@ struct clone_args {
 			 SCHED_FLAG_KEEP_ALL		| \
 			 SCHED_FLAG_UTIL_CLAMP)
 
+/* Only for sched_getattr() own flag param, if task is SCHED_DEADLINE */
+#define SCHED_GETATTR_FLAG_DL_DYNAMIC	0x01
+
 #endif /* _UAPI_LINUX_SCHED_H */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5b64bc62..b1c7c988 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3328,13 +3328,26 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
 	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
 }
 
-void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
+void __getparam_dl(struct task_struct *p, struct sched_attr *attr, unsigned int flags)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
+	struct rq *rq = task_rq(p);
+	u64 adj_deadline;
 
 	attr->sched_priority = p->rt_priority;
-	attr->sched_runtime = dl_se->dl_runtime;
-	attr->sched_deadline = dl_se->dl_deadline;
+	if (flags & SCHED_GETATTR_FLAG_DL_DYNAMIC) {
+		guard(raw_spinlock_irq)(&rq->__lock);
+		update_rq_clock(rq);
+		if (task_current(rq, p))
+			update_curr_dl(rq);
+
+		attr->sched_runtime = dl_se->runtime;
+		adj_deadline = dl_se->deadline - rq_clock(rq) + ktime_get_ns();
+		attr->sched_deadline = adj_deadline;
+	} else {
+		attr->sched_runtime = dl_se->dl_runtime;
+		attr->sched_deadline = dl_se->dl_deadline;
+	}
 	attr->sched_period = dl_se->dl_period;
 	attr->sched_flags &= ~SCHED_DL_FLAGS;
 	attr->sched_flags |= dl_se->flags;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b5367c51..42ddfccb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -353,7 +353,7 @@ extern int  sched_dl_global_validate(void);
 extern void sched_dl_do_global(void);
 extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
 extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
-extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
+extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr, unsigned int flags);
 extern bool __checkparam_dl(const struct sched_attr *attr);
 extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
 extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 77ae87f3..d7eac588 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -928,10 +928,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
 	return -E2BIG;
 }
 
-static void get_params(struct task_struct *p, struct sched_attr *attr)
+static void get_params(struct task_struct *p, struct sched_attr *attr, unsigned int flags)
 {
 	if (task_has_dl_policy(p)) {
-		__getparam_dl(p, attr);
+		__getparam_dl(p, attr, flags);
 	} else if (task_has_rt_policy(p)) {
 		attr->sched_priority = p->rt_priority;
 	} else {
@@ -997,7 +997,7 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
 		return -ESRCH;
 
 	if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
-		get_params(p, &attr);
+		get_params(p, &attr, 0);
 
 	return sched_setattr(p, &attr);
 }
@@ -1082,7 +1082,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
 	int retval;
 
 	if (unlikely(!uattr || pid < 0 || usize > PAGE_SIZE ||
-		      usize < SCHED_ATTR_SIZE_VER0 || flags))
+		     usize < SCHED_ATTR_SIZE_VER0))
 		return -EINVAL;
 
 	scoped_guard (rcu) {
@@ -1090,6 +1090,12 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
 		if (!p)
 			return -ESRCH;
 
+		if (flags) {
+			if (!task_has_dl_policy(p) ||
+			    flags != SCHED_GETATTR_FLAG_DL_DYNAMIC)
+				return -EINVAL;
+		}
+
 		retval = security_task_getscheduler(p);
 		if (retval)
 			return retval;
@@ -1097,7 +1103,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
 		kattr.sched_policy = p->policy;
 		if (p->sched_reset_on_fork)
 			kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-		get_params(p, &kattr);
+		get_params(p, &kattr, flags);
 		kattr.sched_flags &= SCHED_FLAG_ALL;
 
 #ifdef CONFIG_UCLAMP_TASK
-- 
2.45.2


^ permalink raw reply related

* [PATCH 00/62] initrd: remove classic initrd support
From: Askar Safin @ 2025-09-12 22:38 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Linus Torvalds, Greg Kroah-Hartman, Christian Brauner, Al Viro,
	Jan Kara, Christoph Hellwig, Jens Axboe, Andy Shevchenko,
	Aleksa Sarai, Thomas Weißschuh, Julian Stecklina, Gao Xiang,
	Art Nikpal, Andrew Morton, Eric Curtin, Alexander Graf,
	Rob Landley, Lennart Poettering, linux-arch, linux-alpha,
	linux-snps-arc, linux-arm-kernel, linux-csky, linux-hexagon,
	loongarch, linux-m68k, linux-mips, linux-openrisc, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, sparclinux,
	linux-um, x86, Ingo Molnar, linux-block, initramfs, linux-api,
	linux-doc, linux-efi, linux-ext4, Theodore Y . Ts'o,
	linux-acpi, Michal Simek, devicetree, Luis Chamberlain, Kees Cook,
	Thorsten Blum, Heiko Carstens, patches

Intro
====
This patchset removes classic initrd (initial RAM disk) support,
which was deprecated in 2020.
Initramfs still stays, and RAM disk itself (brd) still stays, too.
init/do_mounts* and init/*initramfs* are listed in VFS entry in
MAINTAINERS, so I think this patchset should go through VFS tree.
This patchset touchs every subdirectory in arch/, so I tested it
on 8 (!!!) archs in Qemu (see details below).
Warning: this patchset renames CONFIG_BLK_DEV_INITRD (!!!) to CONFIG_INITRAMFS
and CONFIG_RD_* to CONFIG_INITRAMFS_DECOMPRESS_* (for example,
CONFIG_RD_GZIP to CONFIG_INITRAMFS_DECOMPRESS_GZIP).
If you still use initrd, see below for workaround.

Details
====
I not only removed initrd, I also removed a lot of code, which
became dead, including a lot of code in arch/.

Still I think the only two architectures I touched in non-trivial
way are sh and 32-bit arm.

Also I renamed some files, functions and variables (which became misnomers) to proper names,
moved some code around, removed a lot of mentions of initrd
in code and comments. Also I cleaned up some docs.

For example, I renamed the following global variables:

__initramfs_start
__initramfs_size
phys_initrd_start
phys_initrd_size
initrd_start
initrd_end

to:

__builtin_initramfs_start
__builtin_initramfs_size
phys_external_initramfs_start
phys_external_initramfs_size
virt_external_initramfs_start
virt_external_initramfs_end

New names precisely capture meaning of these variables.

Also I renamed CONFIG_BLK_DEV_INITRD (which became total misnomer)
to CONFIG_INITRAMFS. And CONFIG_RD_* to CONFIG_INITRAMFS_DECOMPRESS_*.
This will break all configs out there (update your configs!).
Still I think this is okay,
because config names never were part of stable API.
Still, I don't have strong opinion here, so I can drop these renamings
if needed.

Other user-visible changes:

- Removed kernel command line parameters "load_ramdisk" and
"prompt_ramdisk", which did nothing and were deprecated
- Removed kernel command line parameter "ramdisk_start",
which was used for initrd only (not for initramfs)
- Removed kernel command line parameter "noinitrd",
which was inconsistent: it controlled initrd only
(not initramfs), except for EFI boot, where it
controlled both initramfs and initrd. EFI users
still can disable initramfs simply by not passing it
- Removed kernel command line parameter "ramdisk_size",
which used for controlling ramdisk (brd), but only
in non-modular mode. Use brd.rd_size instead, it
always works
- Removed /proc/sys/kernel/real-root-dev . It was used
for initrd only

This patchset is based on v6.17-rc5.

Testing
====
I tested my patchset on many architectures in Qemu using my Rust
program, heavily based on mkroot [1].

I used the following cross-compilers:

aarch64-linux-musleabi
armv4l-linux-musleabihf
armv5l-linux-musleabihf
armv7l-linux-musleabihf
i486-linux-musl
i686-linux-musl
mips-linux-musl
mips64-linux-musl
mipsel-linux-musl
powerpc-linux-musl
powerpc64-linux-musl
powerpc64le-linux-musl
riscv32-linux-musl
riscv64-linux-musl
s390x-linux-musl
sh4-linux-musl
sh4eb-linux-musl
x86_64-linux-musl

taken from this directory [2].

So, as you can see, there are 18 triplets, which correspond to 8 subdirs in arch/.

And note that this list contains two archs (arm and sh) touched in non-trivial way.

For every triplet I tested that:
- Initramfs still works (both builtin and external)
- Direct boot from disk still works

Workaround
====
If "retain_initrd" is passed to kernel, then initramfs/initrd,
passed by bootloader, is retained and becomes available after boot
as read-only magic file /sys/firmware/initrd [3].

No copies are involved. I. e. /sys/firmware/initrd is simply
a reference to original blob passed by bootloader.

This works even if initrd/initramfs is not recognized by kernel
in any way, i. e. even if it is not valid cpio archive, nor
a fs image supported by classic initrd.

This works both with my patchset and without it.

This means that you can emulate classic initrd so:
link builtin initramfs to kernel. In /init in this initramfs
copy /sys/firmware/initrd to some file in / and loop-mount it.

This is even better than classic initrd, because:
- You can use fs not supported by classic initrd, for example erofs
- One copy is involved (from /sys/firmware/initrd to some file in /)
as opposed to two when using classic initrd

Still, I don't recommend using this workaround, because
I want everyone to migrate to proper modern initramfs.
But still you can use this workaround if you want.

Also: it is not possible to directly loop-mount
/sys/firmware/initrd . Theoretically kernel can be changed
to allow this (and/or to make it writable), but I think nobody needs this.
And I don't want to implement this.


[1] https://github.com/landley/toybox/tree/master/mkroot
[2] https://landley.net/toybox/downloads/binaries/toolchains/latest
[3] https://lore.kernel.org/all/20231207235654.16622-1-graf@amazon.com/


Askar Safin (62):
  init: remove deprecated "load_ramdisk" command line parameter, which
    does nothing
  init: remove deprecated "prompt_ramdisk" command line parameter, which
    does nothing
  init: sh, sparc, x86: remove unused constants RAMDISK_PROMPT_FLAG and
    RAMDISK_LOAD_FLAG
  init: x86, arm, sh, sparc: remove variable rd_image_start, which
    controls starting block number of initrd
  init: remove "ramdisk_start" command line parameter, which controls
    starting block number of initrd
  arm: init: remove special logic for setting brd.rd_size
  arm: init: remove ATAG_RAMDISK
  arm: init: remove FLAG_RDLOAD and FLAG_RDPROMPT
  arm: init: document rd_start (in param_struct) as obsolete
  initrd: remove initrd (initial RAM disk) support
  init, efi: remove "noinitrd" command line parameter
  init: remove /proc/sys/kernel/real-root-dev
  ext2: remove ext2_image_size and associated code
  init: m68k, mips, powerpc, s390, sh: remove Root_RAM0
  doc: modernize Documentation/admin-guide/blockdev/ramdisk.rst
  brd: remove "ramdisk_size" command line parameter
  doc: modernize Documentation/filesystems/ramfs-rootfs-initramfs.rst
  doc: modernize
    Documentation/driver-api/early-userspace/early_userspace_support.rst
  init: remove mentions of "ramdisk=" command line parameter
  doc: remove Documentation/power/swsusp-dmcrypt.rst
  init: remove all mentions of root=/dev/ram*
  doc: remove obsolete mentions of pivot_root
  init: rename __initramfs_{start,size} to
    __builtin_initramfs_{start,size}
  init: remove wrong comment
  init: rename phys_initrd_{start,size} to
    phys_external_initramfs_{start,size}
  init: move phys_external_initramfs_{start,size} to init/initramfs.c
  init: alpha: remove "extern unsigned long initrd_start, initrd_end"
  init: alpha, arc, arm, arm64, csky, m68k, microblaze, mips, nios2,
    openrisc, parisc, powerpc, s390, sh, sparc, um, x86, xtensa: rename
    initrd_{start,end} to virt_external_initramfs_{start,end}
  init: move virt_external_initramfs_{start,end} to init/initramfs.c
  doc: remove documentation for block device 4 0
  init: rename initrd_below_start_ok to initramfs_below_start_ok
  init: move initramfs_below_start_ok to init/initramfs.c
  init: remove init/do_mounts_initrd.c
  init: inline create_dev into the only caller
  init: make mount_root_generic static
  init: make mount_root static
  init: remove root_mountflags from init/do_mounts.h
  init: remove most headers from init/do_mounts.h
  init: make console_on_rootfs static
  init: rename free_initrd_mem to free_initramfs_mem
  init: rename reserve_initrd_mem to reserve_initramfs_mem
  init: rename <linux/initrd.h> to <linux/initramfs.h>
  setsid: inline ksys_setsid into the only caller
  doc: kernel-parameters: remove [RAM] from reserve_mem=
  doc: kernel-parameters: replace [RAM] with [INITRAMFS]
  init: edit docs for initramfs-related configs
  init: fix typo: virtul => virtual
  init: fix comment
  init: rename ramdisk_execute_command to initramfs_execute_command
  init: rename ramdisk_command_access to initramfs_command_access
  init: rename get_boot_config_from_initrd to
    get_boot_config_from_initramfs
  init: rename do_retain_initrd to retain_initramfs
  init: rename kexec_free_initrd to kexec_free_initramfs
  init: arm, x86: deal with some references to initrd
  init: rename CONFIG_BLK_DEV_INITRD to CONFIG_INITRAMFS
  init: rename CONFIG_RD_GZIP to CONFIG_INITRAMFS_DECOMPRESS_GZIP
  init: rename CONFIG_RD_BZIP2 to CONFIG_INITRAMFS_DECOMPRESS_BZIP2
  init: rename CONFIG_RD_LZMA to CONFIG_INITRAMFS_DECOMPRESS_LZMA
  init: rename CONFIG_RD_XZ to CONFIG_INITRAMFS_DECOMPRESS_XZ
  init: rename CONFIG_RD_LZO to CONFIG_INITRAMFS_DECOMPRESS_LZO
  init: rename CONFIG_RD_LZ4 to CONFIG_INITRAMFS_DECOMPRESS_LZ4
  init: rename CONFIG_RD_ZSTD to CONFIG_INITRAMFS_DECOMPRESS_ZSTD

 .../admin-guide/blockdev/ramdisk.rst          | 104 +----
 .../admin-guide/device-mapper/dm-init.rst     |   4 +-
 Documentation/admin-guide/devices.txt         |  12 -
 Documentation/admin-guide/index.rst           |   1 -
 Documentation/admin-guide/initrd.rst          | 383 ------------------
 .../admin-guide/kernel-parameters.rst         |   4 +-
 .../admin-guide/kernel-parameters.txt         |  38 +-
 Documentation/admin-guide/nfs/nfsroot.rst     |   4 +-
 Documentation/admin-guide/sysctl/kernel.rst   |   6 -
 Documentation/arch/arm/ixp4xx.rst             |   4 +-
 Documentation/arch/arm/setup.rst              |   6 +-
 Documentation/arch/m68k/kernel-options.rst    |  29 +-
 Documentation/arch/x86/boot.rst               |   4 +-
 .../early_userspace_support.rst               |  18 +-
 .../filesystems/ramfs-rootfs-initramfs.rst    |  20 +-
 Documentation/power/index.rst                 |   1 -
 Documentation/power/swsusp-dmcrypt.rst        | 140 -------
 Documentation/security/ipe.rst                |   2 +-
 .../translations/zh_CN/power/index.rst        |   1 -
 arch/alpha/kernel/core_irongate.c             |  12 +-
 arch/alpha/kernel/proto.h                     |   2 +-
 arch/alpha/kernel/setup.c                     |  32 +-
 arch/arc/configs/axs101_defconfig             |   2 +-
 arch/arc/configs/axs103_defconfig             |   2 +-
 arch/arc/configs/axs103_smp_defconfig         |   2 +-
 arch/arc/configs/haps_hs_defconfig            |   2 +-
 arch/arc/configs/haps_hs_smp_defconfig        |   2 +-
 arch/arc/configs/hsdk_defconfig               |   2 +-
 arch/arc/configs/nsim_700_defconfig           |   2 +-
 arch/arc/configs/nsimosci_defconfig           |   2 +-
 arch/arc/configs/nsimosci_hs_defconfig        |   2 +-
 arch/arc/configs/nsimosci_hs_smp_defconfig    |   2 +-
 arch/arc/configs/tb10x_defconfig              |   4 +-
 arch/arc/configs/vdk_hs38_defconfig           |   2 +-
 arch/arc/configs/vdk_hs38_smp_defconfig       |   2 +-
 arch/arc/mm/init.c                            |  14 +-
 arch/arm/Kconfig                              |   2 +-
 arch/arm/boot/dts/arm/integratorap.dts        |   2 +-
 arch/arm/boot/dts/arm/integratorcp.dts        |   2 +-
 .../dts/aspeed/aspeed-bmc-facebook-cmm.dts    |   2 +-
 .../aspeed/aspeed-bmc-facebook-galaxy100.dts  |   2 +-
 .../aspeed/aspeed-bmc-facebook-minipack.dts   |   2 +-
 .../aspeed/aspeed-bmc-facebook-wedge100.dts   |   2 +-
 .../aspeed/aspeed-bmc-facebook-wedge40.dts    |   2 +-
 .../dts/aspeed/aspeed-bmc-facebook-yamp.dts   |   2 +-
 .../ast2600-facebook-netbmc-common.dtsi       |   2 +-
 arch/arm/boot/dts/hisilicon/hi3620-hi4511.dts |   2 +-
 .../ixp/intel-ixp42x-welltech-epbx100.dts     |   2 +-
 arch/arm/boot/dts/nspire/nspire-classic.dtsi  |   2 +-
 arch/arm/boot/dts/nspire/nspire-cx.dts        |   2 +-
 .../boot/dts/samsung/exynos4210-origen.dts    |   2 +-
 .../boot/dts/samsung/exynos4210-smdkv310.dts  |   2 +-
 .../boot/dts/samsung/exynos4412-smdk4412.dts  |   2 +-
 .../boot/dts/samsung/exynos5250-smdk5250.dts  |   2 +-
 arch/arm/boot/dts/st/ste-nomadik-nhk15.dts    |   2 +-
 arch/arm/boot/dts/st/ste-nomadik-s8815.dts    |   2 +-
 arch/arm/boot/dts/st/stm32429i-eval.dts       |   2 +-
 arch/arm/boot/dts/st/stm32746g-eval.dts       |   2 +-
 arch/arm/boot/dts/st/stm32f429-disco.dts      |   2 +-
 arch/arm/boot/dts/st/stm32f469-disco.dts      |   2 +-
 arch/arm/boot/dts/st/stm32f746-disco.dts      |   2 +-
 arch/arm/boot/dts/st/stm32f769-disco.dts      |   2 +-
 arch/arm/boot/dts/st/stm32h743i-disco.dts     |   2 +-
 arch/arm/boot/dts/st/stm32h743i-eval.dts      |   2 +-
 arch/arm/boot/dts/st/stm32h747i-disco.dts     |   2 +-
 arch/arm/boot/dts/st/stm32h750i-art-pi.dts    |   2 +-
 arch/arm/configs/aspeed_g4_defconfig          |   8 +-
 arch/arm/configs/aspeed_g5_defconfig          |   8 +-
 arch/arm/configs/assabet_defconfig            |   4 +-
 arch/arm/configs/at91_dt_defconfig            |   4 +-
 arch/arm/configs/axm55xx_defconfig            |   2 +-
 arch/arm/configs/bcm2835_defconfig            |   2 +-
 arch/arm/configs/clps711x_defconfig           |   4 +-
 arch/arm/configs/collie_defconfig             |   4 +-
 arch/arm/configs/davinci_all_defconfig        |   2 +-
 arch/arm/configs/exynos_defconfig             |   4 +-
 arch/arm/configs/footbridge_defconfig         |   2 +-
 arch/arm/configs/gemini_defconfig             |   2 +-
 arch/arm/configs/h3600_defconfig              |   2 +-
 arch/arm/configs/hisi_defconfig               |   4 +-
 arch/arm/configs/imx_v4_v5_defconfig          |   2 +-
 arch/arm/configs/imx_v6_v7_defconfig          |   4 +-
 arch/arm/configs/integrator_defconfig         |   2 +-
 arch/arm/configs/ixp4xx_defconfig             |   2 +-
 arch/arm/configs/keystone_defconfig           |   2 +-
 arch/arm/configs/lpc18xx_defconfig            |  12 +-
 arch/arm/configs/lpc32xx_defconfig            |   4 +-
 arch/arm/configs/milbeaut_m10v_defconfig      |   2 +-
 arch/arm/configs/multi_v4t_defconfig          |   2 +-
 arch/arm/configs/multi_v5_defconfig           |   2 +-
 arch/arm/configs/multi_v7_defconfig           |   2 +-
 arch/arm/configs/mvebu_v7_defconfig           |   2 +-
 arch/arm/configs/mxs_defconfig                |   2 +-
 arch/arm/configs/neponset_defconfig           |   4 +-
 arch/arm/configs/nhk8815_defconfig            |   2 +-
 arch/arm/configs/omap1_defconfig              |   2 +-
 arch/arm/configs/omap2plus_defconfig          |   2 +-
 arch/arm/configs/pxa910_defconfig             |   2 +-
 arch/arm/configs/pxa_defconfig                |   4 +-
 arch/arm/configs/qcom_defconfig               |   2 +-
 arch/arm/configs/rpc_defconfig                |   2 +-
 arch/arm/configs/s3c6400_defconfig            |   4 +-
 arch/arm/configs/s5pv210_defconfig            |   4 +-
 arch/arm/configs/sama5_defconfig              |   4 +-
 arch/arm/configs/sama7_defconfig              |   2 +-
 arch/arm/configs/shmobile_defconfig           |   2 +-
 arch/arm/configs/socfpga_defconfig            |   2 +-
 arch/arm/configs/sp7021_defconfig             |  12 +-
 arch/arm/configs/spear13xx_defconfig          |   2 +-
 arch/arm/configs/spear3xx_defconfig           |   2 +-
 arch/arm/configs/spear6xx_defconfig           |   2 +-
 arch/arm/configs/spitz_defconfig              |   2 +-
 arch/arm/configs/stm32_defconfig              |   2 +-
 arch/arm/configs/sunxi_defconfig              |   2 +-
 arch/arm/configs/tegra_defconfig              |   2 +-
 arch/arm/configs/u8500_defconfig              |   4 +-
 arch/arm/configs/versatile_defconfig          |   2 +-
 arch/arm/configs/vexpress_defconfig           |   2 +-
 arch/arm/configs/vf610m4_defconfig            |  10 +-
 arch/arm/configs/vt8500_v6_v7_defconfig       |   2 +-
 arch/arm/configs/wpcm450_defconfig            |   2 +-
 arch/arm/include/uapi/asm/setup.h             |  10 -
 arch/arm/kernel/atags_compat.c                |  10 -
 arch/arm/kernel/atags_parse.c                 |  16 +-
 arch/arm/kernel/setup.c                       |   2 +-
 arch/arm/mm/init.c                            |  24 +-
 arch/arm64/configs/defconfig                  |   2 +-
 arch/arm64/kernel/setup.c                     |   2 +-
 arch/arm64/mm/init.c                          |  17 +-
 arch/csky/kernel/setup.c                      |  24 +-
 arch/csky/mm/init.c                           |   2 +-
 arch/hexagon/configs/comet_defconfig          |   2 +-
 arch/loongarch/configs/loongson3_defconfig    |   2 +-
 arch/loongarch/kernel/mem.c                   |   2 +-
 arch/loongarch/kernel/setup.c                 |   4 +-
 arch/m68k/configs/amiga_defconfig             |   2 +-
 arch/m68k/configs/apollo_defconfig            |   2 +-
 arch/m68k/configs/atari_defconfig             |   2 +-
 arch/m68k/configs/bvme6000_defconfig          |   2 +-
 arch/m68k/configs/hp300_defconfig             |   2 +-
 arch/m68k/configs/mac_defconfig               |   2 +-
 arch/m68k/configs/multi_defconfig             |   2 +-
 arch/m68k/configs/mvme147_defconfig           |   2 +-
 arch/m68k/configs/mvme16x_defconfig           |   2 +-
 arch/m68k/configs/q40_defconfig               |   2 +-
 arch/m68k/configs/stmark2_defconfig           |   2 +-
 arch/m68k/configs/sun3_defconfig              |   2 +-
 arch/m68k/configs/sun3x_defconfig             |   2 +-
 arch/m68k/kernel/setup_mm.c                   |  12 +-
 arch/m68k/kernel/setup_no.c                   |  12 +-
 arch/m68k/kernel/uboot.c                      |  17 +-
 arch/microblaze/kernel/cpu/mb.c               |   2 +-
 arch/microblaze/kernel/setup.c                |   2 +-
 arch/microblaze/mm/init.c                     |  12 +-
 arch/mips/ath79/prom.c                        |  12 +-
 arch/mips/configs/ath25_defconfig             |  12 +-
 arch/mips/configs/ath79_defconfig             |   4 +-
 arch/mips/configs/bcm47xx_defconfig           |   2 +-
 arch/mips/configs/bigsur_defconfig            |   2 +-
 arch/mips/configs/bmips_be_defconfig          |   2 +-
 arch/mips/configs/bmips_stb_defconfig         |  14 +-
 arch/mips/configs/cavium_octeon_defconfig     |   2 +-
 arch/mips/configs/eyeq5_defconfig             |   2 +-
 arch/mips/configs/eyeq6_defconfig             |   2 +-
 arch/mips/configs/generic_defconfig           |   2 +-
 arch/mips/configs/gpr_defconfig               |   2 +-
 arch/mips/configs/lemote2f_defconfig          |   2 +-
 arch/mips/configs/loongson2k_defconfig        |   2 +-
 arch/mips/configs/loongson3_defconfig         |   2 +-
 arch/mips/configs/malta_defconfig             |   2 +-
 arch/mips/configs/mtx1_defconfig              |   2 +-
 arch/mips/configs/rb532_defconfig             |   2 +-
 arch/mips/configs/rbtx49xx_defconfig          |   2 +-
 arch/mips/configs/rt305x_defconfig            |   4 +-
 arch/mips/configs/sb1250_swarm_defconfig      |   2 +-
 arch/mips/configs/xway_defconfig              |   4 +-
 arch/mips/kernel/setup.c                      |  53 ++-
 arch/mips/mm/init.c                           |   2 +-
 arch/mips/sibyte/common/cfe.c                 |  36 +-
 arch/mips/sibyte/swarm/setup.c                |   2 +-
 arch/nios2/kernel/setup.c                     |  20 +-
 arch/openrisc/configs/or1klitex_defconfig     |   2 +-
 arch/openrisc/configs/or1ksim_defconfig       |   4 +-
 arch/openrisc/configs/simple_smp_defconfig    |  14 +-
 arch/openrisc/configs/virt_defconfig          |   2 +-
 arch/openrisc/kernel/setup.c                  |  24 +-
 arch/openrisc/kernel/vmlinux.h                |   2 +-
 arch/parisc/boot/compressed/misc.c            |   2 +-
 arch/parisc/configs/generic-32bit_defconfig   |   2 +-
 arch/parisc/configs/generic-64bit_defconfig   |   2 +-
 arch/parisc/defpalo.conf                      |   2 +-
 arch/parisc/kernel/pdt.c                      |   6 +-
 arch/parisc/kernel/setup.c                    |   8 +-
 arch/parisc/mm/init.c                         |  32 +-
 arch/powerpc/configs/44x/akebono_defconfig    |   2 +-
 arch/powerpc/configs/44x/arches_defconfig     |   2 +-
 arch/powerpc/configs/44x/bamboo_defconfig     |   2 +-
 arch/powerpc/configs/44x/bluestone_defconfig  |   2 +-
 .../powerpc/configs/44x/canyonlands_defconfig |   2 +-
 arch/powerpc/configs/44x/ebony_defconfig      |   2 +-
 arch/powerpc/configs/44x/eiger_defconfig      |   2 +-
 arch/powerpc/configs/44x/fsp2_defconfig       |  10 +-
 arch/powerpc/configs/44x/icon_defconfig       |   2 +-
 arch/powerpc/configs/44x/iss476-smp_defconfig |   2 +-
 arch/powerpc/configs/44x/katmai_defconfig     |   2 +-
 arch/powerpc/configs/44x/rainier_defconfig    |   2 +-
 arch/powerpc/configs/44x/redwood_defconfig    |   2 +-
 arch/powerpc/configs/44x/sam440ep_defconfig   |   2 +-
 arch/powerpc/configs/44x/sequoia_defconfig    |   2 +-
 arch/powerpc/configs/44x/taishan_defconfig    |   2 +-
 arch/powerpc/configs/44x/warp_defconfig       |   2 +-
 arch/powerpc/configs/52xx/cm5200_defconfig    |   2 +-
 arch/powerpc/configs/52xx/lite5200b_defconfig |   2 +-
 arch/powerpc/configs/52xx/motionpro_defconfig |   2 +-
 arch/powerpc/configs/52xx/tqm5200_defconfig   |   2 +-
 arch/powerpc/configs/83xx/asp8347_defconfig   |   2 +-
 .../configs/83xx/mpc8313_rdb_defconfig        |   2 +-
 .../configs/83xx/mpc8315_rdb_defconfig        |   2 +-
 .../configs/83xx/mpc832x_rdb_defconfig        |   2 +-
 .../configs/83xx/mpc834x_itx_defconfig        |   2 +-
 .../configs/83xx/mpc834x_itxgp_defconfig      |   2 +-
 .../configs/83xx/mpc836x_rdk_defconfig        |   2 +-
 .../configs/83xx/mpc837x_rdb_defconfig        |   2 +-
 arch/powerpc/configs/85xx/ge_imp3a_defconfig  |   2 +-
 arch/powerpc/configs/85xx/ksi8560_defconfig   |   2 +-
 arch/powerpc/configs/85xx/socrates_defconfig  |   2 +-
 arch/powerpc/configs/85xx/stx_gp3_defconfig   |   2 +-
 arch/powerpc/configs/85xx/tqm8540_defconfig   |   2 +-
 arch/powerpc/configs/85xx/tqm8541_defconfig   |   2 +-
 arch/powerpc/configs/85xx/tqm8548_defconfig   |   2 +-
 arch/powerpc/configs/85xx/tqm8555_defconfig   |   2 +-
 arch/powerpc/configs/85xx/tqm8560_defconfig   |   2 +-
 .../configs/85xx/xes_mpc85xx_defconfig        |   2 +-
 arch/powerpc/configs/amigaone_defconfig       |   2 +-
 arch/powerpc/configs/cell_defconfig           |   2 +-
 arch/powerpc/configs/chrp32_defconfig         |   2 +-
 arch/powerpc/configs/fsl-emb-nonhw.config     |   2 +-
 arch/powerpc/configs/g5_defconfig             |   2 +-
 arch/powerpc/configs/gamecube_defconfig       |   2 +-
 arch/powerpc/configs/holly_defconfig          |   2 +-
 arch/powerpc/configs/linkstation_defconfig    |   2 +-
 arch/powerpc/configs/mgcoge_defconfig         |   4 +-
 arch/powerpc/configs/microwatt_defconfig      |   2 +-
 arch/powerpc/configs/mpc512x_defconfig        |   2 +-
 arch/powerpc/configs/mpc5200_defconfig        |   2 +-
 arch/powerpc/configs/mpc83xx_defconfig        |   2 +-
 arch/powerpc/configs/pasemi_defconfig         |   2 +-
 arch/powerpc/configs/pmac32_defconfig         |   2 +-
 arch/powerpc/configs/powernv_defconfig        |   2 +-
 arch/powerpc/configs/ppc44x_defconfig         |   2 +-
 arch/powerpc/configs/ppc64_defconfig          |   2 +-
 arch/powerpc/configs/ppc64e_defconfig         |   2 +-
 arch/powerpc/configs/ppc6xx_defconfig         |   2 +-
 arch/powerpc/configs/ps3_defconfig            |   2 +-
 arch/powerpc/configs/skiroot_defconfig        |  12 +-
 arch/powerpc/configs/wii_defconfig            |   2 +-
 arch/powerpc/kernel/prom.c                    |  22 +-
 arch/powerpc/kernel/prom_init.c               |   6 +-
 arch/powerpc/kernel/setup-common.c            |  25 +-
 arch/powerpc/kernel/setup_32.c                |   2 +-
 arch/powerpc/kernel/setup_64.c                |   2 +-
 arch/powerpc/mm/init_32.c                     |   2 +-
 arch/powerpc/platforms/52xx/lite5200.c        |   2 +-
 arch/powerpc/platforms/83xx/km83xx.c          |   2 +-
 arch/powerpc/platforms/85xx/mpc85xx_mds.c     |   2 +-
 arch/powerpc/platforms/chrp/setup.c           |   2 +-
 .../platforms/embedded6xx/linkstation.c       |   2 +-
 .../platforms/embedded6xx/storcenter.c        |   2 +-
 arch/powerpc/platforms/powermac/setup.c       |   8 +-
 arch/riscv/configs/defconfig                  |   2 +-
 arch/riscv/configs/nommu_k210_defconfig       |  16 +-
 arch/riscv/configs/nommu_virt_defconfig       |  12 +-
 arch/riscv/mm/init.c                          |   4 +-
 arch/s390/boot/ipl_parm.c                     |   2 +-
 arch/s390/boot/startup.c                      |   4 +-
 arch/s390/configs/zfcpdump_defconfig          |   2 +-
 arch/s390/kernel/setup.c                      |  10 +-
 arch/s390/mm/init.c                           |   2 +-
 arch/sh/configs/apsh4a3a_defconfig            |   2 +-
 arch/sh/configs/apsh4ad0a_defconfig           |   2 +-
 arch/sh/configs/ecovec24-romimage_defconfig   |   2 +-
 arch/sh/configs/edosk7760_defconfig           |   2 +-
 arch/sh/configs/kfr2r09-romimage_defconfig    |   2 +-
 arch/sh/configs/kfr2r09_defconfig             |   2 +-
 arch/sh/configs/magicpanelr2_defconfig        |   2 +-
 arch/sh/configs/migor_defconfig               |   2 +-
 arch/sh/configs/rsk7201_defconfig             |   2 +-
 arch/sh/configs/rsk7203_defconfig             |   2 +-
 arch/sh/configs/sdk7786_defconfig             |   8 +-
 arch/sh/configs/se7206_defconfig              |   2 +-
 arch/sh/configs/se7705_defconfig              |   2 +-
 arch/sh/configs/se7722_defconfig              |   2 +-
 arch/sh/configs/se7751_defconfig              |   2 +-
 arch/sh/configs/secureedge5410_defconfig      |   2 +-
 arch/sh/configs/sh03_defconfig                |   2 +-
 arch/sh/configs/sh7757lcr_defconfig           |   2 +-
 arch/sh/configs/titan_defconfig               |   2 +-
 arch/sh/configs/ul2_defconfig                 |   2 +-
 arch/sh/configs/urquell_defconfig             |   2 +-
 arch/sh/include/asm/setup.h                   |   1 -
 arch/sh/kernel/head_32.S                      |   2 +-
 arch/sh/kernel/setup.c                        |  27 +-
 arch/sparc/boot/piggyback.c                   |   4 +-
 arch/sparc/configs/sparc32_defconfig          |   2 +-
 arch/sparc/configs/sparc64_defconfig          |   2 +-
 arch/sparc/kernel/head_32.S                   |   4 +-
 arch/sparc/kernel/head_64.S                   |   6 +-
 arch/sparc/kernel/setup_32.c                  |   9 +-
 arch/sparc/kernel/setup_64.c                  |   9 +-
 arch/sparc/mm/init_32.c                       |  22 +-
 arch/sparc/mm/init_64.c                       |  20 +-
 arch/um/kernel/Makefile                       |   2 +-
 arch/um/kernel/initrd.c                       |   6 +-
 arch/x86/Kconfig                              |   2 +-
 arch/x86/boot/header.S                        |   2 +-
 arch/x86/boot/startup/sme.c                   |   2 +-
 arch/x86/configs/i386_defconfig               |   2 +-
 arch/x86/configs/x86_64_defconfig             |   2 +-
 arch/x86/include/uapi/asm/bootparam.h         |   7 +-
 arch/x86/kernel/cpu/microcode/amd.c           |   2 +-
 arch/x86/kernel/cpu/microcode/core.c          |  12 +-
 arch/x86/kernel/cpu/microcode/intel.c         |   2 +-
 arch/x86/kernel/cpu/microcode/internal.h      |   2 +-
 arch/x86/kernel/devicetree.c                  |   2 +-
 arch/x86/kernel/setup.c                       |  39 +-
 arch/x86/mm/init.c                            |   8 +-
 arch/x86/mm/init_32.c                         |   2 +-
 arch/x86/mm/init_64.c                         |   2 +-
 arch/x86/tools/relocs.c                       |   2 +-
 arch/xtensa/Kconfig                           |   2 +-
 arch/xtensa/boot/dts/csp.dts                  |   2 +-
 arch/xtensa/configs/audio_kc705_defconfig     |   2 +-
 arch/xtensa/configs/cadence_csp_defconfig     |  12 +-
 arch/xtensa/configs/generic_kc705_defconfig   |   2 +-
 arch/xtensa/configs/nommu_kc705_defconfig     |  12 +-
 arch/xtensa/configs/smp_lx200_defconfig       |   2 +-
 arch/xtensa/configs/virt_defconfig            |   2 +-
 arch/xtensa/configs/xip_kc705_defconfig       |   2 +-
 arch/xtensa/kernel/setup.c                    |  26 +-
 drivers/acpi/Kconfig                          |   2 +-
 drivers/acpi/tables.c                         |  10 +-
 drivers/base/firmware_loader/main.c           |   2 +-
 drivers/block/Kconfig                         |   8 +-
 drivers/block/brd.c                           |  20 +-
 drivers/firmware/efi/efi.c                    |  10 +-
 .../firmware/efi/libstub/efi-stub-helper.c    |   5 +-
 drivers/gpu/drm/ci/arm.config                 |   2 +-
 drivers/gpu/drm/ci/arm64.config               |   2 +-
 drivers/gpu/drm/ci/x86_64.config              |   2 +-
 drivers/of/fdt.c                              |  18 +-
 fs/ext2/ext2.h                                |   9 -
 fs/init.c                                     |  14 -
 include/asm-generic/vmlinux.lds.h             |   8 +-
 include/linux/ext2_fs.h                       |  13 -
 include/linux/init_syscalls.h                 |   1 -
 include/linux/initramfs.h                     |  26 ++
 include/linux/initrd.h                        |  37 --
 include/linux/root_dev.h                      |   1 -
 include/linux/syscalls.h                      |   1 -
 include/uapi/linux/sysctl.h                   |   1 -
 init/.kunitconfig                             |   2 +-
 init/Kconfig                                  |  28 +-
 init/Makefile                                 |   6 +-
 init/do_mounts.c                              |  28 +-
 init/do_mounts.h                              |  42 --
 init/do_mounts_initrd.c                       | 154 -------
 init/do_mounts_rd.c                           | 334 ---------------
 init/initramfs.c                              | 152 ++++---
 init/main.c                                   |  66 +--
 kernel/sys.c                                  |   7 +-
 kernel/sysctl.c                               |   2 +-
 kernel/umh.c                                  |   2 +-
 scripts/package/builddeb                      |   2 +-
 .../ktest/examples/bootconfigs/tracing.bconf  |   3 -
 tools/testing/selftests/bpf/config.aarch64    |   2 +-
 tools/testing/selftests/bpf/config.ppc64el    |   2 +-
 tools/testing/selftests/bpf/config.riscv64    |   2 +-
 tools/testing/selftests/bpf/config.s390x      |   2 +-
 tools/testing/selftests/kho/vmtest.sh         |   2 +-
 .../testing/selftests/nolibc/Makefile.nolibc  |   4 +-
 tools/testing/selftests/vsock/config          |   2 +-
 .../selftests/wireguard/qemu/kernel.config    |   2 +-
 usr/Kconfig                                   |  70 ++--
 usr/Makefile                                  |   2 +-
 usr/initramfs_data.S                          |   4 +-
 385 files changed, 969 insertions(+), 2346 deletions(-)
 delete mode 100644 Documentation/admin-guide/initrd.rst
 delete mode 100644 Documentation/power/swsusp-dmcrypt.rst
 create mode 100644 include/linux/initramfs.h
 delete mode 100644 include/linux/initrd.h
 delete mode 100644 init/do_mounts_initrd.c
 delete mode 100644 init/do_mounts_rd.c


base-commit: 76eeb9b8de9880ca38696b2fb56ac45ac0a25c6c
-- 
2.47.2


^ permalink raw reply

* [PATCH 01/62] init: remove deprecated "load_ramdisk" command line parameter, which does nothing
From: Askar Safin @ 2025-09-12 22:38 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Linus Torvalds, Greg Kroah-Hartman, Christian Brauner, Al Viro,
	Jan Kara, Christoph Hellwig, Jens Axboe, Andy Shevchenko,
	Aleksa Sarai, Thomas Weißschuh, Julian Stecklina, Gao Xiang,
	Art Nikpal, Andrew Morton, Eric Curtin, Alexander Graf,
	Rob Landley, Lennart Poettering, linux-arch, linux-alpha,
	linux-snps-arc, linux-arm-kernel, linux-csky, linux-hexagon,
	loongarch, linux-m68k, linux-mips, linux-openrisc, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, sparclinux,
	linux-um, x86, Ingo Molnar, linux-block, initramfs, linux-api,
	linux-doc, linux-efi, linux-ext4, Theodore Y . Ts'o,
	linux-acpi, Michal Simek, devicetree, Luis Chamberlain, Kees Cook,
	Thorsten Blum, Heiko Carstens, patches
In-Reply-To: <20250912223937.3735076-1-safinaskar@zohomail.com>

This is preparation for initrd removal

Signed-off-by: Askar Safin <safinaskar@zohomail.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 --
 arch/arm/configs/neponset_defconfig             | 2 +-
 init/do_mounts.c                                | 7 -------
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..d3b05ce249ff 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3275,8 +3275,6 @@
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
-	load_ramdisk=	[RAM] [Deprecated]
-
 	lockd.nlm_grace_period=P  [NFS] Assign grace period.
 			Format: <integer>
 
diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig
index 2227f86100ad..16f7300239da 100644
--- a/arch/arm/configs/neponset_defconfig
+++ b/arch/arm/configs/neponset_defconfig
@@ -9,7 +9,7 @@ CONFIG_ASSABET_NEPONSET=y
 CONFIG_ZBOOT_ROM_TEXT=0x80000
 CONFIG_ZBOOT_ROM_BSS=0xc1000000
 CONFIG_ZBOOT_ROM=y
-CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) load_ramdisk=1 prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M"
+CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M"
 CONFIG_FPE_NWFPE=y
 CONFIG_PM=y
 CONFIG_MODULES=y
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 6af29da8889e..0f2f44e6250c 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -34,13 +34,6 @@ static int root_wait;
 
 dev_t ROOT_DEV;
 
-static int __init load_ramdisk(char *str)
-{
-	pr_warn("ignoring the deprecated load_ramdisk= option\n");
-	return 1;
-}
-__setup("load_ramdisk=", load_ramdisk);
-
 static int __init readonly(char *str)
 {
 	if (*str)
-- 
2.47.2


^ permalink raw reply related

* [PATCH 02/62] init: remove deprecated "prompt_ramdisk" command line parameter, which does nothing
From: Askar Safin @ 2025-09-12 22:38 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel
  Cc: Linus Torvalds, Greg Kroah-Hartman, Christian Brauner, Al Viro,
	Jan Kara, Christoph Hellwig, Jens Axboe, Andy Shevchenko,
	Aleksa Sarai, Thomas Weißschuh, Julian Stecklina, Gao Xiang,
	Art Nikpal, Andrew Morton, Eric Curtin, Alexander Graf,
	Rob Landley, Lennart Poettering, linux-arch, linux-alpha,
	linux-snps-arc, linux-arm-kernel, linux-csky, linux-hexagon,
	loongarch, linux-m68k, linux-mips, linux-openrisc, linux-parisc,
	linuxppc-dev, linux-riscv, linux-s390, linux-sh, sparclinux,
	linux-um, x86, Ingo Molnar, linux-block, initramfs, linux-api,
	linux-doc, linux-efi, linux-ext4, Theodore Y . Ts'o,
	linux-acpi, Michal Simek, devicetree, Luis Chamberlain, Kees Cook,
	Thorsten Blum, Heiko Carstens, patches
In-Reply-To: <20250912223937.3735076-1-safinaskar@zohomail.com>

This is preparation for initrd removal

Signed-off-by: Askar Safin <safinaskar@zohomail.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 --
 arch/arm/configs/neponset_defconfig             | 2 +-
 init/do_mounts_rd.c                             | 7 -------
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d3b05ce249ff..f940c1184912 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5229,8 +5229,6 @@
 			Param: <number> - step/bucket size as a power of 2 for
 				statistical time based profiling.
 
-	prompt_ramdisk=	[RAM] [Deprecated]
-
 	prot_virt=	[S390] enable hosting protected virtual machines
 			isolated from the hypervisor (if hardware supports
 			that). If enabled, the default kernel base address
diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig
index 16f7300239da..4d720001c12e 100644
--- a/arch/arm/configs/neponset_defconfig
+++ b/arch/arm/configs/neponset_defconfig
@@ -9,7 +9,7 @@ CONFIG_ASSABET_NEPONSET=y
 CONFIG_ZBOOT_ROM_TEXT=0x80000
 CONFIG_ZBOOT_ROM_BSS=0xc1000000
 CONFIG_ZBOOT_ROM=y
-CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M"
+CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) mem=32M noinitrd initrd=0xc0800000,3M"
 CONFIG_FPE_NWFPE=y
 CONFIG_PM=y
 CONFIG_MODULES=y
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index ac021ae6e6fa..f7d53bc21e41 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -17,13 +17,6 @@
 static struct file *in_file, *out_file;
 static loff_t in_pos, out_pos;
 
-static int __init prompt_ramdisk(char *str)
-{
-	pr_warn("ignoring the deprecated prompt_ramdisk= option\n");
-	return 1;
-}
-__setup("prompt_ramdisk=", prompt_ramdisk);
-
 int __initdata rd_image_start;		/* starting block # of image */
 
 static int __init ramdisk_start_setup(char *str)
-- 
2.47.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox