From: Christian Brauner <brauner@kernel.org>
To: linux-fsdevel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
linux-kernel@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>,
Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
Tejun Heo <tj@kernel.org>, Jann Horn <jannh@google.com>,
Christian Brauner <brauner@kernel.org>
Subject: [PATCH RFC v3 04/26] fs: add real_fs to track task's actual fs_struct
Date: Wed, 11 Mar 2026 22:43:47 +0100 [thread overview]
Message-ID: <20260311-work-kthread-nullfs-v3-4-3dd2cbe92ad0@kernel.org> (raw)
In-Reply-To: <20260311-work-kthread-nullfs-v3-0-3dd2cbe92ad0@kernel.org>
Add a real_fs field to task_struct that always mirrors the fs field.
This lays the groundwork for distinguishing between a task's permanent
fs_struct and one that is temporarily overridden via scoped_with_init_fs().
When a kthread temporarily overrides current->fs for path lookup, we
need to know the original fs_struct for operations like exit_fs() and
unshare_fs_struct() that must operate on the real, permanent fs.
For now real_fs is always equal to fs. It is maintained alongside fs in
all the relevant paths: exit_fs(), unshare_fs_struct(),
switch_fs_struct(), and copy_fs().
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/fs_struct.c | 11 ++++++++---
fs/proc/array.c | 4 ++--
fs/proc/base.c | 8 ++++----
fs/proc_namespace.c | 4 ++--
include/linux/sched.h | 1 +
init/init_task.c | 1 +
kernel/fork.c | 8 +++++++-
kernel/kcmp.c | 2 +-
8 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index fcecf209f1a9..c03a574ed65a 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -61,7 +61,7 @@ void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
task_lock(p);
- fs = p->fs;
+ fs = p->real_fs;
if (fs) {
int hits = 0;
write_seqlock(&fs->seq);
@@ -89,12 +89,13 @@ void free_fs_struct(struct fs_struct *fs)
void exit_fs(struct task_struct *tsk)
{
- struct fs_struct *fs = tsk->fs;
+ struct fs_struct *fs = tsk->real_fs;
if (fs) {
int kill;
task_lock(tsk);
read_seqlock_excl(&fs->seq);
+ tsk->real_fs = NULL;
tsk->fs = NULL;
kill = !--fs->users;
read_sequnlock_excl(&fs->seq);
@@ -126,7 +127,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
int unshare_fs_struct(void)
{
- struct fs_struct *fs = current->fs;
+ struct fs_struct *fs = current->real_fs;
struct fs_struct *new_fs = copy_fs_struct(fs);
int kill;
@@ -135,8 +136,10 @@ int unshare_fs_struct(void)
task_lock(current);
read_seqlock_excl(&fs->seq);
+ VFS_WARN_ON_ONCE(fs != current->fs);
kill = !--fs->users;
current->fs = new_fs;
+ current->real_fs = new_fs;
read_sequnlock_excl(&fs->seq);
task_unlock(current);
@@ -177,8 +180,10 @@ struct fs_struct *switch_fs_struct(struct fs_struct *new_fs)
scoped_guard(task_lock, current) {
fs = current->fs;
+ VFS_WARN_ON_ONCE(fs != current->real_fs);
read_seqlock_excl(&fs->seq);
current->fs = new_fs;
+ current->real_fs = new_fs;
if (--fs->users)
new_fs = NULL;
else
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f447e734612a..10d792b8f170 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -168,8 +168,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
cred = get_task_cred(p);
task_lock(p);
- if (p->fs)
- umask = p->fs->umask;
+ if (p->real_fs)
+ umask = p->real_fs->umask;
if (p->files)
max_fds = files_fdtable(p->files)->max_fds;
task_unlock(p);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c863d17dfb4..28067e77b820 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -210,8 +210,8 @@ static int get_task_root(struct task_struct *task, struct path *root)
int result = -ENOENT;
task_lock(task);
- if (task->fs) {
- get_fs_root(task->fs, root);
+ if (task->real_fs) {
+ get_fs_root(task->real_fs, root);
result = 0;
}
task_unlock(task);
@@ -225,8 +225,8 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
if (task) {
task_lock(task);
- if (task->fs) {
- get_fs_pwd(task->fs, path);
+ if (task->real_fs) {
+ get_fs_pwd(task->real_fs, path);
result = 0;
}
task_unlock(task);
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5c555db68aa2..036356c0a55b 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -254,13 +254,13 @@ static int mounts_open_common(struct inode *inode, struct file *file,
}
ns = nsp->mnt_ns;
get_mnt_ns(ns);
- if (!task->fs) {
+ if (!task->real_fs) {
task_unlock(task);
put_task_struct(task);
ret = -ENOENT;
goto err_put_ns;
}
- get_fs_root(task->fs, &root);
+ get_fs_root(task->real_fs, &root);
task_unlock(task);
put_task_struct(task);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f..5c7b9df92ebb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1179,6 +1179,7 @@ struct task_struct {
unsigned long last_switch_time;
#endif
/* Filesystem information: */
+ struct fs_struct *real_fs;
struct fs_struct *fs;
/* Open file information: */
diff --git a/init/init_task.c b/init/init_task.c
index 5c838757fc10..7d0b4a5927eb 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -152,6 +152,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
RCU_POINTER_INITIALIZER(cred, &init_cred),
.comm = INIT_TASK_COMM,
.thread = INIT_THREAD,
+ .real_fs = &init_fs,
.fs = &init_fs,
.files = &init_files,
#ifdef CONFIG_IO_URING
diff --git a/kernel/fork.c b/kernel/fork.c
index 67e57ee44548..154703cf7d3d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1593,6 +1593,8 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk)
static int copy_fs(u64 clone_flags, struct task_struct *tsk)
{
struct fs_struct *fs = current->fs;
+
+ VFS_WARN_ON_ONCE(current->fs != current->real_fs);
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
read_seqlock_excl(&fs->seq);
@@ -1605,7 +1607,7 @@ static int copy_fs(u64 clone_flags, struct task_struct *tsk)
read_sequnlock_excl(&fs->seq);
return 0;
}
- tsk->fs = copy_fs_struct(fs);
+ tsk->real_fs = tsk->fs = copy_fs_struct(fs);
if (!tsk->fs)
return -ENOMEM;
return 0;
@@ -3152,6 +3154,10 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ /* No unsharing with overriden fs state */
+ VFS_WARN_ON_ONCE(unshare_flags & (CLONE_NEWNS | CLONE_FS) &&
+ current->fs != current->real_fs);
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 7c1a65bd5f8d..76476aeee067 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -186,7 +186,7 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
break;
case KCMP_FS:
- ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
+ ret = kcmp_ptr(task1->real_fs, task2->real_fs, KCMP_FS);
break;
case KCMP_SIGHAND:
ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
--
2.47.3
next prev parent reply other threads:[~2026-03-11 21:56 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-11 21:43 [PATCH RFC v3 00/26] fs,kthread: start all kthreads in nullfs Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 01/26] fs: add switch_fs_struct() Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 02/26] fs: notice when init abandons fs sharing Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 03/26] fs: add scoped_with_init_fs() Christian Brauner
2026-03-11 21:43 ` Christian Brauner [this message]
2026-03-11 21:43 ` [PATCH RFC v3 05/26] fs: make userspace_init_fs a dynamically-initialized pointer Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 06/26] rnbd: use scoped_with_init_fs() for block device open Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 07/26] crypto: ccp: use scoped_with_init_fs() for SEV file access Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 08/26] scsi: target: use scoped_with_init_fs() for ALUA metadata Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 09/26] scsi: target: use scoped_with_init_fs() for APTPL metadata Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 10/26] btrfs: use scoped_with_init_fs() for update_dev_time() Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 11/26] coredump: use scoped_with_init_fs() for coredump path resolution Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 12/26] fs: use scoped_with_init_fs() for kernel_read_file_from_path_initns() Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 13/26] ksmbd: use scoped_with_init_fs() for share path resolution Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 14/26] ksmbd: use scoped_with_init_fs() for filesystem info path lookup Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 15/26] ksmbd: use scoped_with_init_fs() for VFS path operations Christian Brauner
2026-03-11 21:43 ` [PATCH RFC v3 16/26] pnfs/blocklayout: use scoped_with_init_fs() for SCSI device lookup Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 17/26] initramfs: use scoped_with_init_fs() for rootfs unpacking Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 18/26] af_unix: use scoped_with_init_fs() for coredump socket lookup Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 19/26] fs: stop sharing fs_struct between init_task and pid 1 Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 20/26] fs: add umh argument to struct kernel_clone_args Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 21/26] fs: add kthread_mntns() Christian Brauner
2026-03-11 22:13 ` Thomas Weißschuh
2026-03-11 21:44 ` [PATCH RFC v3 22/26] devtmpfs: create private mount namespace Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 23/26] nullfs: make nullfs multi-instance Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 24/26] fs: start all kthreads in nullfs Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 25/26] fs: stop rewriting kthread fs structs Christian Brauner
2026-03-11 21:44 ` [PATCH RFC v3 26/26] fs: stop rewriting paths for PF_EXITING | PF_DUMPCORE Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260311-work-kthread-nullfs-v3-4-3dd2cbe92ad0@kernel.org \
--to=brauner@kernel.org \
--cc=axboe@kernel.dk \
--cc=jack@suse.cz \
--cc=jannh@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox