* [PATCH] vfs: transitive upgrade restrictions for fds
2026-03-23 22:00 [RFC PATCH 0/1] vfs: transitive upgrade restrictions for fds Jori Koolstra
@ 2026-03-23 22:00 ` Jori Koolstra
2026-03-24 8:41 ` Christian Brauner
2026-03-24 14:37 ` Greg KH
2026-03-24 12:31 ` [RFC PATCH 0/1] " Jeff Layton
1 sibling, 2 replies; 6+ messages in thread
From: Jori Koolstra @ 2026-03-23 22:00 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Jan Kara, Jeff Layton,
Chuck Lever, Alexander Aring, Arnd Bergmann, gregkh
Cc: Jori Koolstra, Andrew Morton, Liam R . Howlett, Mike Rapoport,
David Hildenbrand, Lorenzo Stoakes, zhang jiao, Kees Cook,
Penglei Jiang, Ethan Tidmore, Oleg Nesterov, Suren Baghdasaryan,
Vlastimil Babka, wangzijie, NeilBrown, Amir Goldstein,
Mateusz Guzik, linux-fsdevel, linux-kernel, linux-arch,
Namjae Jeon
Add upgrade restrictions to openat2(). Extend struct open_how to allow
setting transitive restrictions on using file descriptors to open other
files. A use case for this feature is to block services or containers
from re-opening/upgrading an O_PATH file descriptor through e.g.
/proc/<pid>/fd/<nr as O_WRONLY.
The idea for this features comes form the UAPI group kernel feature idea
list [1].
[1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2
Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
---
fs/file_table.c | 2 ++
fs/internal.h | 1 +
fs/namei.c | 38 ++++++++++++++++++++++++++++----
fs/open.c | 9 ++++++++
fs/proc/base.c | 24 ++++++++++++++------
fs/proc/fd.c | 6 ++++-
fs/proc/internal.h | 4 +++-
include/linux/fcntl.h | 6 ++++-
include/linux/fs.h | 1 +
include/linux/namei.h | 15 ++++++++++++-
include/uapi/asm-generic/fcntl.h | 4 ++++
include/uapi/linux/openat2.h | 1 +
12 files changed, 96 insertions(+), 15 deletions(-)
diff --git a/fs/file_table.c b/fs/file_table.c
index aaa5faaace1e..b98038009fd2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -196,6 +196,8 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
f->f_wb_err = 0;
f->f_sb_err = 0;
+ f->f_allowed_upgrades = VALID_UPGRADE_FLAGS;
+
/*
* We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While
* fget-rcu pattern users need to be able to handle spurious
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..0a37bb208184 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -189,6 +189,7 @@ struct open_flags {
int acc_mode;
int intent;
int lookup_flags;
+ unsigned int allowed_upgrades;
};
extern struct file *do_file_open(int dfd, struct filename *pathname,
const struct open_flags *op);
diff --git a/fs/namei.c b/fs/namei.c
index 58f715f7657e..3982908ff995 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -743,6 +743,7 @@ struct nameidata {
int dfd;
vfsuid_t dir_vfsuid;
umode_t dir_mode;
+ unsigned int allowed_upgrades;
} __randomize_layout;
#define ND_ROOT_PRESET 1
@@ -760,6 +761,7 @@ static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
p->path.mnt = NULL;
p->path.dentry = NULL;
p->total_link_count = old ? old->total_link_count : 0;
+ p->allowed_upgrades = VALID_UPGRADE_FLAGS;
p->saved = old;
current->nameidata = p;
}
@@ -1155,12 +1157,11 @@ static int nd_jump_root(struct nameidata *nd)
nd->state |= ND_JUMPED;
return 0;
}
-
/*
* Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
-int nd_jump_link(const struct path *path)
+int nd_jump_link_how(const struct path *path, const struct jump_how how)
{
int error = -ELOOP;
struct nameidata *nd = current->nameidata;
@@ -1181,6 +1182,7 @@ int nd_jump_link(const struct path *path)
nd->path = *path;
nd->inode = nd->path.dentry->d_inode;
nd->state |= ND_JUMPED;
+ nd->allowed_upgrades &= how.allowed_upgrades;
return 0;
err:
@@ -2738,6 +2740,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
if (fd_empty(f))
return ERR_PTR(-EBADF);
+ nd->allowed_upgrades = fd_file(f)->f_allowed_upgrades;
+
if (flags & LOOKUP_LINKAT_EMPTY) {
if (fd_file(f)->f_cred != current_cred() &&
!ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH))
@@ -4266,6 +4270,28 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return 0;
}
+static bool may_upgrade(const int flag, const unsigned int allowed_upgrades)
+{
+ int mode = flag & O_ACCMODE;
+ unsigned int allowed = allowed_upgrades & ~DENY_UPGRADES;
+
+ if (mode != O_WRONLY && !(allowed & READ_UPGRADABLE))
+ return false;
+ if (mode != O_RDONLY && !(allowed & WRITE_UPGRADABLE))
+ return false;
+ return true;
+}
+
+static int may_open_upgrade(struct mnt_idmap *idmap, const struct path *path,
+ int acc_mode, int flag,
+ const unsigned int allowed_upgrades)
+{
+ if (!may_upgrade(flag, allowed_upgrades))
+ return -EACCES;
+
+ return may_open(idmap, path, acc_mode, flag);
+}
+
static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
{
const struct path *path = &filp->f_path;
@@ -4666,7 +4692,8 @@ static int do_open(struct nameidata *nd,
return error;
do_truncate = true;
}
- error = may_open(idmap, &nd->path, acc_mode, open_flag);
+ error = may_open_upgrade(idmap, &nd->path, acc_mode, open_flag,
+ nd->allowed_upgrades);
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
@@ -4831,8 +4858,11 @@ static struct file *path_openat(struct nameidata *nd,
terminate_walk(nd);
}
if (likely(!error)) {
- if (likely(file->f_mode & FMODE_OPENED))
+ if (likely(file->f_mode & FMODE_OPENED)) {
+ file->f_allowed_upgrades =
+ op->allowed_upgrades & nd->allowed_upgrades;
return file;
+ }
WARN_ON(1);
error = -EINVAL;
}
diff --git a/fs/open.c b/fs/open.c
index 91f1139591ab..212a1d260947 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1167,6 +1167,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
struct open_how how = {
.flags = flags & VALID_OPEN_FLAGS,
.mode = mode & S_IALLUGO,
+ .allowed_upgrades = VALID_UPGRADE_FLAGS
};
/* O_PATH beats everything else. */
@@ -1300,6 +1301,14 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
}
op->lookup_flags = lookup_flags;
+
+ if (how->allowed_upgrades == 0)
+ op->allowed_upgrades = VALID_UPGRADE_FLAGS;
+ else if (how->allowed_upgrades & ~VALID_UPGRADE_FLAGS)
+ return -EINVAL;
+ else
+ op->allowed_upgrades = how->allowed_upgrades;
+
return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c863d17dfb4..84c54f9dffd9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -218,7 +218,8 @@ static int get_task_root(struct task_struct *task, struct path *root)
return result;
}
-static int proc_cwd_link(struct dentry *dentry, struct path *path)
+static int proc_cwd_link(struct dentry *dentry, struct path *path,
+ struct jump_how *jump_how)
{
struct task_struct *task = get_proc_task(d_inode(dentry));
int result = -ENOENT;
@@ -227,6 +228,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
task_lock(task);
if (task->fs) {
get_fs_pwd(task->fs, path);
+ *jump_how = JUMP_HOW_UNRESTRICTED;
result = 0;
}
task_unlock(task);
@@ -235,7 +237,8 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
return result;
}
-static int proc_root_link(struct dentry *dentry, struct path *path)
+static int proc_root_link(struct dentry *dentry, struct path *path,
+ struct jump_how *jump_how)
{
struct task_struct *task = get_proc_task(d_inode(dentry));
int result = -ENOENT;
@@ -243,6 +246,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
if (task) {
result = get_task_root(task, path);
put_task_struct(task);
+ *jump_how = JUMP_HOW_UNRESTRICTED;
}
return result;
}
@@ -1777,7 +1781,8 @@ static const struct file_operations proc_pid_set_comm_operations = {
.release = single_release,
};
-static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path,
+ struct jump_how *jump_how)
{
struct task_struct *task;
struct file *exe_file;
@@ -1789,6 +1794,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
put_task_struct(task);
if (exe_file) {
*exe_path = exe_file->f_path;
+ *jump_how = JUMP_HOW_UNRESTRICTED;
path_get(&exe_file->f_path);
fput(exe_file);
return 0;
@@ -1801,6 +1807,7 @@ static const char *proc_pid_get_link(struct dentry *dentry,
struct delayed_call *done)
{
struct path path;
+ struct jump_how jump_how;
int error = -EACCES;
if (!dentry)
@@ -1810,11 +1817,11 @@ static const char *proc_pid_get_link(struct dentry *dentry,
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+ error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how);
if (error)
goto out;
- error = nd_jump_link(&path);
+ error = nd_jump_link_how(&path, jump_how);
out:
return ERR_PTR(error);
}
@@ -1848,12 +1855,13 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
int error = -EACCES;
struct inode *inode = d_inode(dentry);
struct path path;
+ struct jump_how jump_how;
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+ error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how);
if (error)
goto out;
@@ -2250,7 +2258,8 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path,
+ struct jump_how *jump_how)
{
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
@@ -2279,6 +2288,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
rc = -ENOENT;
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
+ *jump_how = JUMP_HOW_UNRESTRICTED;
*path = *file_user_path(vma->vm_file);
path_get(path);
rc = 0;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 9eeccff49b2a..344485e8cb6f 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -171,7 +171,8 @@ static const struct dentry_operations tid_fd_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int proc_fd_link(struct dentry *dentry, struct path *path)
+static int proc_fd_link(struct dentry *dentry, struct path *path,
+ struct jump_how *jump_how)
{
struct task_struct *task;
int ret = -ENOENT;
@@ -183,6 +184,9 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
fd_file = fget_task(task, fd);
if (fd_file) {
+ *jump_how = (struct jump_how) {
+ .allowed_upgrades = fd_file->f_allowed_upgrades
+ };
*path = fd_file->f_path;
path_get(&fd_file->f_path);
ret = 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c1e8eb984da8..42f668059a30 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/mm.h>
+#include <linux/namei.h>
struct ctl_table_header;
struct mempolicy;
@@ -107,7 +108,8 @@ extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
union proc_op {
- int (*proc_get_link)(struct dentry *, struct path *);
+ int (*proc_get_link)(struct dentry *, struct path *,
+ struct jump_how *);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a332e79b3207..6b15b488d542 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -12,6 +12,9 @@
FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
+#define VALID_UPGRADE_FLAGS \
+ (DENY_UPGRADES | READ_UPGRADABLE | WRITE_UPGRADABLE)
+
/* List of all valid flags for the how->resolve argument: */
#define VALID_RESOLVE_FLAGS \
(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
@@ -19,7 +22,8 @@
/* List of all open_how "versions". */
#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
-#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0
+#define OPEN_HOW_SIZE_VER1 32 /* added allowed_upgrades */
+#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER1
#ifndef force_o_largefile
#define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..697d2fc6322b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1296,6 +1296,7 @@ struct file {
};
file_ref_t f_ref;
/* --- cacheline 3 boundary (192 bytes) --- */
+ unsigned int f_allowed_upgrades;
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 58600cf234bc..b827df5b59d9 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -203,7 +203,20 @@ static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umo
return mode;
}
-extern int __must_check nd_jump_link(const struct path *path);
+struct jump_how {
+ unsigned int allowed_upgrades;
+};
+
+#define JUMP_HOW_UNRESTRICTED \
+ ((const struct jump_how){ .allowed_upgrades = VALID_UPGRADE_FLAGS })
+
+extern int __must_check nd_jump_link_how(const struct path *path,
+ const struct jump_how how);
+
+static inline int nd_jump_link(const struct path *path)
+{
+ return nd_jump_link_how(path, JUMP_HOW_UNRESTRICTED);
+}
static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
{
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 613475285643..a3e36d86af1d 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -95,6 +95,10 @@
#define O_NDELAY O_NONBLOCK
#endif
+#define DENY_UPGRADES 0x01
+#define READ_UPGRADABLE (0x02 | DENY_UPGRADES)
+#define WRITE_UPGRADABLE (0x04 | DENY_UPGRADES)
+
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
#define F_SETFD 2 /* set/clear close_on_exec */
diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h
index a5feb7604948..32c302758e72 100644
--- a/include/uapi/linux/openat2.h
+++ b/include/uapi/linux/openat2.h
@@ -20,6 +20,7 @@ struct open_how {
__u64 flags;
__u64 mode;
__u64 resolve;
+ __u64 allowed_upgrades;
};
/* how->resolve flags for openat2(2). */
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC PATCH 0/1] vfs: transitive upgrade restrictions for fds
2026-03-23 22:00 [RFC PATCH 0/1] vfs: transitive upgrade restrictions for fds Jori Koolstra
2026-03-23 22:00 ` [PATCH] " Jori Koolstra
@ 2026-03-24 12:31 ` Jeff Layton
1 sibling, 0 replies; 6+ messages in thread
From: Jeff Layton @ 2026-03-24 12:31 UTC (permalink / raw)
To: Jori Koolstra, Alexander Viro, Christian Brauner, Jan Kara,
Chuck Lever, Alexander Aring, Arnd Bergmann, gregkh
Cc: Andrew Morton, Liam R . Howlett, Mike Rapoport, David Hildenbrand,
Lorenzo Stoakes, zhang jiao, Kees Cook, Penglei Jiang,
Ethan Tidmore, Oleg Nesterov, Suren Baghdasaryan, Vlastimil Babka,
wangzijie, NeilBrown, Amir Goldstein, Mateusz Guzik,
linux-fsdevel, linux-kernel, linux-arch
On Mon, 2026-03-23 at 23:00 +0100, Jori Koolstra wrote:
> Add upgrade restrictions to openat2(). Extend struct open_how to allow
> setting transitive restrictions on using file descriptors to open other
> files. A use case for this feature is to block services or containers
> from re-opening/upgrading an O_PATH file descriptor through e.g.
> /proc/<pid>/fd/<nr> or OPENAT2_EMPTY_PATH (if upstreamed) as O_WRONLY.
>
> The implementation idea is this: magic paths like /proc/<pid>/fd/<nr>
> (currently the only one of its sort AFAIK) go through nd_jump_link() to
> hard set current->nameidata. To include information about the fd
> yielding the magic link, we add a new struct jump_how as a parameter.
> This struct may include restictions or other metadata attached to the
> magic link jump other than the struct path to jump to. So far it has
> only one unsigned int field: allowed_upgrades. This is a flag int that
> (for now) may be either READ_UPGRADABLE, WRITE_UPGRADABLE, or
> DENY_UPGRADES.
>
> The idea is that you can restrict what kind of open flags may be used
> to open files in any way using this fd as a starting point
> (transitively). The check is enforced in may_open_upgrade(), which is
> just the old may_open() with an extra test. To keep this state attached
> to the fds, we add a field f_allowed_upgrades to struct file. Then
> in do_open(), after success, we compute:
>
> file->f_allowed_upgrades =
> op->allowed_upgrades & nd->allowed_upgrades;
>
> where op is the struct open_flags that is build from open_how in
> build_open_flags(), and nd->allowed_upgrades is set during path
> traversal either in path_init() or nd_jump_link().
>
> The implementation and the idea are a bit rough; it is the first bit of
> less trivial work I have done on the kernel, hence the RFC status. I did
> create some self tests already which this patch passes, and nothing
> seems to break on a fresh vng kernel. But obviously there may be MANY
> things I am overlooking.
>
> The original idea for this features comes form the UAPI group kernel
> feature idea list [1].
>
> [1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2
>
> Jori Koolstra (1):
> vfs: transitive upgrade restrictions for fds
>
> fs/file_table.c | 2 ++
> fs/internal.h | 1 +
> fs/namei.c | 38 ++++++++++++++++++++++++++++----
> fs/open.c | 9 ++++++++
> fs/proc/base.c | 24 ++++++++++++++------
> fs/proc/fd.c | 6 ++++-
> fs/proc/internal.h | 4 +++-
> include/linux/fcntl.h | 6 ++++-
> include/linux/fs.h | 1 +
> include/linux/namei.h | 15 ++++++++++++-
> include/uapi/asm-generic/fcntl.h | 4 ++++
> include/uapi/linux/openat2.h | 1 +
> 12 files changed, 96 insertions(+), 15 deletions(-)
It's an interesting idea, but I could see it being difficult to track
the result of this across a large chain of open fd's.
If you are going to do this, then at the very least you should add a
mechanism (fcntl() command?) to query the current f_allowed_upgrade
mask, so that this can be debugged in some fashion.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 6+ messages in thread