public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: linux-fsdevel@vger.kernel.org,
	 Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	 Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
	 Tejun Heo <tj@kernel.org>, Jann Horn <jannh@google.com>,
	 Christian Brauner <brauner@kernel.org>
Subject: [PATCH RFC v2 16/23] fs: make userspace_init_fs a dynamically-initialized pointer
Date: Fri, 06 Mar 2026 00:30:19 +0100	[thread overview]
Message-ID: <20260306-work-kthread-nullfs-v2-16-ad1b4bed7d3e@kernel.org> (raw)
In-Reply-To: <20260306-work-kthread-nullfs-v2-0-ad1b4bed7d3e@kernel.org>

Change userspace_init_fs from a declared-but-unused extern struct to
a dynamically initialized pointer. Add init_userspace_fs() which is
called early in kernel_init() (PID 1) to record PID 1's fs_struct
as the canonical userspace filesystem state.

Wire up __override_init_fs() and __revert_init_fs() to actually swap
current->fs to/from userspace_init_fs. Previously these were no-ops
that stored current->fs back to itself.

Fix nullfs_userspace_init() to compare against userspace_init_fs
instead of &init_fs. When PID 1 unshares its filesystem state, revert
userspace_init_fs to init_fs's root (nullfs) so that stale filesystem
state is not silently inherited by kworkers and usermodehelpers.

At this stage PID 1's fs still points to rootfs (set by
init_mount_tree), so userspace_init_fs points to rootfs and
scoped_with_init_fs() is functionally equivalent to its previous no-op
behavior.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fs_struct.c            | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/fs_struct.h |  5 +++--
 include/linux/init_task.h |  1 +
 init/main.c               |  3 +++
 4 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index b9b9a327f299..c1afa7513e34 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -8,6 +8,7 @@
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
 #include "internal.h"
+#include "mount.h"
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -163,15 +164,32 @@ EXPORT_SYMBOL_GPL(unshare_fs_struct);
  * fs_struct state. Breaking that contract sucks for both sides.
  * So just don't bother with extra work for this. No sane init
  * system should ever do this.
+ *
+ * On older kernels if PID 1 unshared its filesystem state with us the
+ * kernel simply used the stale fs_struct state implicitly pinning
+ * anything that PID 1 had last used. Even if PID 1 might've moved on to
+ * some completely different fs_struct state and might've even unmounted
+ * the old root.
+ *
+ * This has hilarious consequences: Think continuing to dump coredump
+ * state into an implicitly pinned directory somewhere. Calling random
+ * binaries in the old rootfs via usermodehelpers.
+ *
+ * Be aggressive about this: We simply reject operating on stale
+ * fs_struct state by reverting to nullfs. Every kworker that does
+ * lookups after this point will fail. Every usermodehelper call will
+ * fail. Tough luck but let's be kind and emit a warning to userspace.
  */
 static inline void nullfs_userspace_init(struct fs_struct *old_fs)
 {
 	if (likely(current->pid != 1))
 		return;
 	/* @old_fs may be dangling but for comparison it's fine */
-	if (old_fs != &init_fs)
+	if (old_fs != userspace_init_fs)
 		return;
 	pr_warn("VFS: Pid 1 stopped sharing filesystem state\n");
+	set_fs_root(userspace_init_fs, &init_fs.root);
+	set_fs_pwd(userspace_init_fs, &init_fs.root);
 }
 
 struct fs_struct *switch_fs_struct(struct fs_struct *new_fs)
@@ -198,3 +216,29 @@ struct fs_struct init_fs = {
 	.seq		= __SEQLOCK_UNLOCKED(init_fs.seq),
 	.umask		= 0022,
 };
+
+struct fs_struct *userspace_init_fs __ro_after_init;
+EXPORT_SYMBOL_GPL(userspace_init_fs);
+
+void __init init_userspace_fs(void)
+{
+	struct mount *m;
+	struct path root;
+
+	/* Move PID 1 from nullfs into the initramfs. */
+	m = topmost_overmount(current->nsproxy->mnt_ns->root);
+	root.mnt = &m->mnt;
+	root.dentry = root.mnt->mnt_root;
+
+	VFS_WARN_ON_ONCE(current->pid != 1);
+
+	set_fs_root(current->fs, &root);
+	set_fs_pwd(current->fs, &root);
+
+	/* Hold a reference for the global pointer. */
+	read_seqlock_excl(&current->fs->seq);
+	current->fs->users++;
+	read_sequnlock_excl(&current->fs->seq);
+
+	userspace_init_fs = current->fs;
+}
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index ff525a1e45d4..51d335924029 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -17,6 +17,7 @@ struct fs_struct {
 } __randomize_layout;
 
 extern struct kmem_cache *fs_cachep;
+extern struct fs_struct *userspace_init_fs;
 
 extern void exit_fs(struct task_struct *);
 extern void set_fs_root(struct fs_struct *, const struct path *);
@@ -60,13 +61,13 @@ static inline struct fs_struct *__override_init_fs(void)
 	struct fs_struct *fs;
 
 	fs = current->fs;
-	smp_store_release(&current->fs, current->fs);
+	smp_store_release(&current->fs, userspace_init_fs);
 	return fs;
 }
 
 static inline void __revert_init_fs(struct fs_struct *revert_fs)
 {
-	VFS_WARN_ON_ONCE(current->fs != current->fs);
+	VFS_WARN_ON_ONCE(current->fs != userspace_init_fs);
 	smp_store_release(&current->fs, revert_fs);
 }
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6cb241ea00c..61536be773f5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -24,6 +24,7 @@
 
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
+extern struct fs_struct *userspace_init_fs;
 extern struct nsproxy init_nsproxy;
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..5ccc642a5aa7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -102,6 +102,7 @@
 #include <linux/stackdepot.h>
 #include <linux/randomize_kstack.h>
 #include <linux/pidfs.h>
+#include <linux/fs_struct.h>
 #include <linux/ptdump.h>
 #include <linux/time_namespace.h>
 #include <linux/unaligned.h>
@@ -1574,6 +1575,8 @@ static int __ref kernel_init(void *unused)
 {
 	int ret;
 
+	init_userspace_fs();
+
 	/*
 	 * Wait until kthreadd is all set-up.
 	 */

-- 
2.47.3


  parent reply	other threads:[~2026-03-05 23:31 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-05 23:30 [PATCH RFC v2 00/23] fs,kthread: start all kthreads in nullfs Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 01/23] fs: notice when init abandons fs sharing Christian Brauner
2026-03-10 16:03   ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 02/23] fs: add scoped_with_init_fs() Christian Brauner
2026-03-09 15:19   ` Jann Horn
2026-03-10 11:30     ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 03/23] rnbd: use scoped_with_init_fs() for block device open Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 04/23] crypto: ccp: use scoped_with_init_fs() for SEV file access Christian Brauner
2026-03-09 15:37   ` Jann Horn
2026-03-10 11:33     ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 05/23] scsi: target: use scoped_with_init_fs() for ALUA metadata Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 06/23] scsi: target: use scoped_with_init_fs() for APTPL metadata Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 07/23] btrfs: use scoped_with_init_fs() for update_dev_time() Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 08/23] coredump: use scoped_with_init_fs() for coredump path resolution Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 09/23] fs: use scoped_with_init_fs() for kernel_read_file_from_path_initns() Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 10/23] ksmbd: use scoped_with_init_fs() for share path resolution Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 11/23] ksmbd: use scoped_with_init_fs() for filesystem info path lookup Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 12/23] ksmbd: use scoped_with_init_fs() for VFS path operations Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 13/23] initramfs: use scoped_with_init_fs() for rootfs unpacking Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 14/23] af_unix: use scoped_with_init_fs() for coredump socket lookup Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 15/23] fs: add real_fs to track task's actual fs_struct Christian Brauner
2026-03-07  0:51   ` Askar Safin
2026-03-09 15:14   ` Jann Horn
2026-03-10 11:29     ` Christian Brauner
2026-03-10 16:05       ` Christian Brauner
2026-03-05 23:30 ` Christian Brauner [this message]
2026-03-05 23:30 ` [PATCH RFC v2 17/23] fs: stop sharing fs_struct between init_task and pid 1 Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 18/23] fs: add umh argument to struct kernel_clone_args Christian Brauner
2026-03-09 16:06   ` Jann Horn
2026-03-10 11:58     ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 19/23] fs: add kthread_mntns() Christian Brauner
2026-03-07  2:04   ` Askar Safin
2026-03-05 23:30 ` [PATCH RFC v2 20/23] devtmpfs: create private mount namespace Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 21/23] nullfs: make nullfs multi-instance Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 22/23] fs: start all kthreads in nullfs Christian Brauner
2026-03-07 22:17   ` Askar Safin
2026-03-05 23:30 ` [PATCH RFC v2 23/23] fs: stop rewriting kthread fs structs Christian Brauner
2026-03-07  2:19 ` [PATCH RFC v2 00/23] fs,kthread: start all kthreads in nullfs Askar Safin
2026-03-09 16:50 ` Jann Horn
2026-03-10 12:54   ` Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260306-work-kthread-nullfs-v2-16-ad1b4bed7d3e@kernel.org \
    --to=brauner@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox