From: Christian Brauner <brauner@kernel.org>
To: linux-fsdevel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>,
Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
Tejun Heo <tj@kernel.org>, Jann Horn <jannh@google.com>,
Christian Brauner <brauner@kernel.org>
Subject: [PATCH RFC v2 16/23] fs: make userspace_init_fs a dynamically-initialized pointer
Date: Fri, 06 Mar 2026 00:30:19 +0100 [thread overview]
Message-ID: <20260306-work-kthread-nullfs-v2-16-ad1b4bed7d3e@kernel.org> (raw)
In-Reply-To: <20260306-work-kthread-nullfs-v2-0-ad1b4bed7d3e@kernel.org>
Change userspace_init_fs from a declared-but-unused extern struct to
a dynamically initialized pointer. Add init_userspace_fs() which is
called early in kernel_init() (PID 1) to record PID 1's fs_struct
as the canonical userspace filesystem state.
Wire up __override_init_fs() and __revert_init_fs() to actually swap
current->fs to/from userspace_init_fs. Previously these were no-ops
that stored current->fs back to itself.
Fix nullfs_userspace_init() to compare against userspace_init_fs
instead of &init_fs. When PID 1 unshares its filesystem state, revert
userspace_init_fs to init_fs's root (nullfs) so that stale filesystem
state is not silently inherited by kworkers and usermodehelpers.
At this stage PID 1's fs still points to rootfs (set by
init_mount_tree), so userspace_init_fs points to rootfs and
scoped_with_init_fs() is functionally equivalent to its previous no-op
behavior.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/fs_struct.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
include/linux/fs_struct.h | 5 +++--
include/linux/init_task.h | 1 +
init/main.c | 3 +++
4 files changed, 52 insertions(+), 3 deletions(-)
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index b9b9a327f299..c1afa7513e34 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -8,6 +8,7 @@
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include "internal.h"
+#include "mount.h"
/*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -163,15 +164,32 @@ EXPORT_SYMBOL_GPL(unshare_fs_struct);
* fs_struct state. Breaking that contract sucks for both sides.
* So just don't bother with extra work for this. No sane init
* system should ever do this.
+ *
+ * On older kernels if PID 1 unshared its filesystem state with us the
+ * kernel simply used the stale fs_struct state implicitly pinning
+ * anything that PID 1 had last used. Even if PID 1 might've moved on to
+ * some completely different fs_struct state and might've even unmounted
+ * the old root.
+ *
+ * This has hilarious consequences: Think continuing to dump coredump
+ * state into an implicitly pinned directory somewhere. Calling random
+ * binaries in the old rootfs via usermodehelpers.
+ *
+ * Be aggressive about this: We simply reject operating on stale
+ * fs_struct state by reverting to nullfs. Every kworker that does
+ * lookups after this point will fail. Every usermodehelper call will
+ * fail. Tough luck but let's be kind and emit a warning to userspace.
*/
static inline void nullfs_userspace_init(struct fs_struct *old_fs)
{
if (likely(current->pid != 1))
return;
/* @old_fs may be dangling but for comparison it's fine */
- if (old_fs != &init_fs)
+ if (old_fs != userspace_init_fs)
return;
pr_warn("VFS: Pid 1 stopped sharing filesystem state\n");
+ set_fs_root(userspace_init_fs, &init_fs.root);
+ set_fs_pwd(userspace_init_fs, &init_fs.root);
}
struct fs_struct *switch_fs_struct(struct fs_struct *new_fs)
@@ -198,3 +216,29 @@ struct fs_struct init_fs = {
.seq = __SEQLOCK_UNLOCKED(init_fs.seq),
.umask = 0022,
};
+
+struct fs_struct *userspace_init_fs __ro_after_init;
+EXPORT_SYMBOL_GPL(userspace_init_fs);
+
+void __init init_userspace_fs(void)
+{
+ struct mount *m;
+ struct path root;
+
+ /* Move PID 1 from nullfs into the initramfs. */
+ m = topmost_overmount(current->nsproxy->mnt_ns->root);
+ root.mnt = &m->mnt;
+ root.dentry = root.mnt->mnt_root;
+
+ VFS_WARN_ON_ONCE(current->pid != 1);
+
+ set_fs_root(current->fs, &root);
+ set_fs_pwd(current->fs, &root);
+
+ /* Hold a reference for the global pointer. */
+ read_seqlock_excl(¤t->fs->seq);
+ current->fs->users++;
+ read_sequnlock_excl(¤t->fs->seq);
+
+ userspace_init_fs = current->fs;
+}
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index ff525a1e45d4..51d335924029 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -17,6 +17,7 @@ struct fs_struct {
} __randomize_layout;
extern struct kmem_cache *fs_cachep;
+extern struct fs_struct *userspace_init_fs;
extern void exit_fs(struct task_struct *);
extern void set_fs_root(struct fs_struct *, const struct path *);
@@ -60,13 +61,13 @@ static inline struct fs_struct *__override_init_fs(void)
struct fs_struct *fs;
fs = current->fs;
- smp_store_release(¤t->fs, current->fs);
+ smp_store_release(¤t->fs, userspace_init_fs);
return fs;
}
static inline void __revert_init_fs(struct fs_struct *revert_fs)
{
- VFS_WARN_ON_ONCE(current->fs != current->fs);
+ VFS_WARN_ON_ONCE(current->fs != userspace_init_fs);
smp_store_release(¤t->fs, revert_fs);
}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6cb241ea00c..61536be773f5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -24,6 +24,7 @@
extern struct files_struct init_files;
extern struct fs_struct init_fs;
+extern struct fs_struct *userspace_init_fs;
extern struct nsproxy init_nsproxy;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..5ccc642a5aa7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -102,6 +102,7 @@
#include <linux/stackdepot.h>
#include <linux/randomize_kstack.h>
#include <linux/pidfs.h>
+#include <linux/fs_struct.h>
#include <linux/ptdump.h>
#include <linux/time_namespace.h>
#include <linux/unaligned.h>
@@ -1574,6 +1575,8 @@ static int __ref kernel_init(void *unused)
{
int ret;
+ init_userspace_fs();
+
/*
* Wait until kthreadd is all set-up.
*/
--
2.47.3
next prev parent reply other threads:[~2026-03-05 23:31 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-05 23:30 [PATCH RFC v2 00/23] fs,kthread: start all kthreads in nullfs Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 01/23] fs: notice when init abandons fs sharing Christian Brauner
2026-03-10 16:03 ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 02/23] fs: add scoped_with_init_fs() Christian Brauner
2026-03-09 15:19 ` Jann Horn
2026-03-10 11:30 ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 03/23] rnbd: use scoped_with_init_fs() for block device open Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 04/23] crypto: ccp: use scoped_with_init_fs() for SEV file access Christian Brauner
2026-03-09 15:37 ` Jann Horn
2026-03-10 11:33 ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 05/23] scsi: target: use scoped_with_init_fs() for ALUA metadata Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 06/23] scsi: target: use scoped_with_init_fs() for APTPL metadata Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 07/23] btrfs: use scoped_with_init_fs() for update_dev_time() Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 08/23] coredump: use scoped_with_init_fs() for coredump path resolution Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 09/23] fs: use scoped_with_init_fs() for kernel_read_file_from_path_initns() Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 10/23] ksmbd: use scoped_with_init_fs() for share path resolution Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 11/23] ksmbd: use scoped_with_init_fs() for filesystem info path lookup Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 12/23] ksmbd: use scoped_with_init_fs() for VFS path operations Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 13/23] initramfs: use scoped_with_init_fs() for rootfs unpacking Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 14/23] af_unix: use scoped_with_init_fs() for coredump socket lookup Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 15/23] fs: add real_fs to track task's actual fs_struct Christian Brauner
2026-03-07 0:51 ` Askar Safin
2026-03-09 15:14 ` Jann Horn
2026-03-10 11:29 ` Christian Brauner
2026-03-10 16:05 ` Christian Brauner
2026-03-05 23:30 ` Christian Brauner [this message]
2026-03-05 23:30 ` [PATCH RFC v2 17/23] fs: stop sharing fs_struct between init_task and pid 1 Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 18/23] fs: add umh argument to struct kernel_clone_args Christian Brauner
2026-03-09 16:06 ` Jann Horn
2026-03-10 11:58 ` Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 19/23] fs: add kthread_mntns() Christian Brauner
2026-03-07 2:04 ` Askar Safin
2026-03-05 23:30 ` [PATCH RFC v2 20/23] devtmpfs: create private mount namespace Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 21/23] nullfs: make nullfs multi-instance Christian Brauner
2026-03-05 23:30 ` [PATCH RFC v2 22/23] fs: start all kthreads in nullfs Christian Brauner
2026-03-07 22:17 ` Askar Safin
2026-03-05 23:30 ` [PATCH RFC v2 23/23] fs: stop rewriting kthread fs structs Christian Brauner
2026-03-07 2:19 ` [PATCH RFC v2 00/23] fs,kthread: start all kthreads in nullfs Askar Safin
2026-03-09 16:50 ` Jann Horn
2026-03-10 12:54 ` Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260306-work-kthread-nullfs-v2-16-ad1b4bed7d3e@kernel.org \
--to=brauner@kernel.org \
--cc=axboe@kernel.dk \
--cc=jack@suse.cz \
--cc=jannh@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.