* [PATCH 1/5] fs: add vfs_open_tree() helper
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
@ 2025-01-28 10:33 ` Christian Brauner
2025-01-28 10:33 ` [PATCH 2/5] fs: add copy_mount_setattr() helper Christian Brauner
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2025-01-28 10:33 UTC (permalink / raw)
To: linux-fsdevel
Cc: Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton, Jan Kara,
Lennart Poettering, Daan De Meyer, Seth Forshee, Arnd Bergmann,
Christian Brauner
Split out vfs_open_tree() from open_tree() so we can use it in later
patches.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/namespace.c | 49 ++++++++++++++++++++++++++-----------------------
1 file changed, 26 insertions(+), 23 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 4013fbac354a284732eb10e5a869b86184a52d1d..2a8ac568a08d125290ae3cdeeeec3280ea4c1721 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2889,24 +2889,22 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
return file;
}
-SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
+static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags)
{
- struct file *file;
- struct path path;
+ int ret;
+ struct path path __free(path_put) = {};
int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
bool detached = flags & OPEN_TREE_CLONE;
- int error;
- int fd;
BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if (flags & AT_NO_AUTOMOUNT)
lookup_flags &= ~LOOKUP_AUTOMOUNT;
@@ -2916,27 +2914,32 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, fl
lookup_flags |= LOOKUP_EMPTY;
if (detached && !may_mount())
- return -EPERM;
+ return ERR_PTR(-EPERM);
+
+ ret = user_path_at(dfd, filename, lookup_flags, &path);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ if (detached)
+ return open_detached_copy(&path, flags & AT_RECURSIVE);
+
+ return dentry_open(&path, O_PATH, current_cred());
+}
+
+SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
+{
+ int fd;
+ struct file *file __free(fput) = NULL;
+
+ file = vfs_open_tree(dfd, filename, flags);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
- error = user_path_at(dfd, filename, lookup_flags, &path);
- if (unlikely(error)) {
- file = ERR_PTR(error);
- } else {
- if (detached)
- file = open_detached_copy(&path, flags & AT_RECURSIVE);
- else
- file = dentry_open(&path, O_PATH, current_cred());
- path_put(&path);
- }
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- return PTR_ERR(file);
- }
- fd_install(fd, file);
+ fd_install(fd, no_free_ptr(file));
return fd;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/5] fs: add copy_mount_setattr() helper
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
2025-01-28 10:33 ` [PATCH 1/5] fs: add vfs_open_tree() helper Christian Brauner
@ 2025-01-28 10:33 ` Christian Brauner
2025-01-28 10:33 ` [PATCH 3/5] fs: add open_tree_attr() Christian Brauner
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2025-01-28 10:33 UTC (permalink / raw)
To: linux-fsdevel
Cc: Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton, Jan Kara,
Lennart Poettering, Daan De Meyer, Seth Forshee, Arnd Bergmann,
Christian Brauner
Split out copy_mount_setattr() from mount_setattr() so we can use it in
later patches.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/namespace.c | 73 ++++++++++++++++++++++++++++++++--------------------------
1 file changed, 40 insertions(+), 33 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2a8ac568a08d125290ae3cdeeeec3280ea4c1721..ef7db39c8776ab13da968d7f69c56698e3846914 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4721,7 +4721,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
}
static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
- struct mount_kattr *kattr, unsigned int flags)
+ struct mount_kattr *kattr)
{
struct ns_common *ns;
struct user_namespace *mnt_userns;
@@ -4772,22 +4772,8 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
}
static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
- struct mount_kattr *kattr, unsigned int flags)
+ struct mount_kattr *kattr)
{
- unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
-
- if (flags & AT_NO_AUTOMOUNT)
- lookup_flags &= ~LOOKUP_AUTOMOUNT;
- if (flags & AT_SYMLINK_NOFOLLOW)
- lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
-
- *kattr = (struct mount_kattr) {
- .lookup_flags = lookup_flags,
- .recurse = !!(flags & AT_RECURSIVE),
- };
-
if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
return -EINVAL;
if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
@@ -4835,7 +4821,7 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
return -EINVAL;
}
- return build_mount_idmapped(attr, usize, kattr, flags);
+ return build_mount_idmapped(attr, usize, kattr);
}
static void finish_mount_kattr(struct mount_kattr *kattr)
@@ -4847,23 +4833,14 @@ static void finish_mount_kattr(struct mount_kattr *kattr)
mnt_idmap_put(kattr->mnt_idmap);
}
-SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
- unsigned int, flags, struct mount_attr __user *, uattr,
- size_t, usize)
+static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize,
+ struct mount_kattr *kattr)
{
- int err;
- struct path target;
+ int ret;
struct mount_attr attr;
- struct mount_kattr kattr;
BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
- if (flags & ~(AT_EMPTY_PATH |
- AT_RECURSIVE |
- AT_SYMLINK_NOFOLLOW |
- AT_NO_AUTOMOUNT))
- return -EINVAL;
-
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
@@ -4872,9 +4849,9 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
if (!may_mount())
return -EPERM;
- err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
- if (err)
- return err;
+ ret = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
+ if (ret)
+ return ret;
/* Don't bother walking through the mounts if this is a nop. */
if (attr.attr_set == 0 &&
@@ -4882,7 +4859,37 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
attr.propagation == 0)
return 0;
- err = build_mount_kattr(&attr, usize, &kattr, flags);
+ return build_mount_kattr(&attr, usize, kattr);
+}
+
+SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
+ unsigned int, flags, struct mount_attr __user *, uattr,
+ size_t, usize)
+{
+ int err;
+ struct path target;
+ struct mount_kattr kattr;
+ unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+
+ if (flags & ~(AT_EMPTY_PATH |
+ AT_RECURSIVE |
+ AT_SYMLINK_NOFOLLOW |
+ AT_NO_AUTOMOUNT))
+ return -EINVAL;
+
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ kattr = (struct mount_kattr) {
+ .lookup_flags = lookup_flags,
+ .recurse = !!(flags & AT_RECURSIVE),
+ };
+
+ err = copy_mount_setattr(uattr, usize, &kattr);
if (err)
return err;
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 3/5] fs: add open_tree_attr()
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
2025-01-28 10:33 ` [PATCH 1/5] fs: add vfs_open_tree() helper Christian Brauner
2025-01-28 10:33 ` [PATCH 2/5] fs: add copy_mount_setattr() helper Christian Brauner
@ 2025-01-28 10:33 ` Christian Brauner
2025-01-28 10:33 ` [PATCH 4/5] fs: add kflags member to struct mount_kattr Christian Brauner
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2025-01-28 10:33 UTC (permalink / raw)
To: linux-fsdevel
Cc: Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton, Jan Kara,
Lennart Poettering, Daan De Meyer, Seth Forshee, Arnd Bergmann,
Christian Brauner
Add open_tree_attr() which allow to atomically create a detached mount
tree and set mount options on it. If OPEN_TREE_CLONE is used this will
allow the creation of a detached mount with a new set of mount options
without it ever being exposed to userspace without that set of mount
options applied.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
arch/alpha/kernel/syscalls/syscall.tbl | 1 +
arch/arm/tools/syscall.tbl | 1 +
arch/arm64/tools/syscall_32.tbl | 1 +
arch/m68k/kernel/syscalls/syscall.tbl | 1 +
arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n64.tbl | 1 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 1 +
arch/parisc/kernel/syscalls/syscall.tbl | 1 +
arch/powerpc/kernel/syscalls/syscall.tbl | 1 +
arch/s390/kernel/syscalls/syscall.tbl | 1 +
arch/sh/kernel/syscalls/syscall.tbl | 1 +
arch/sparc/kernel/syscalls/syscall.tbl | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
arch/xtensa/kernel/syscalls/syscall.tbl | 1 +
fs/namespace.c | 39 +++++++++++++++++++++++++++++
include/linux/syscalls.h | 4 +++
include/uapi/asm-generic/unistd.h | 4 ++-
scripts/syscall.tbl | 1 +
20 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index c59d53d6d3f3490f976ca179ddfe02e69265ae4d..2dd6340de6b4efddc406f0c235701c15cf02f650 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -506,3 +506,4 @@
574 common getxattrat sys_getxattrat
575 common listxattrat sys_listxattrat
576 common removexattrat sys_removexattrat
+577 common open_tree_attr sys_open_tree_attr
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 49eeb2ad8dbd8e074c6240417693f23fb328afa8..27c1d5ebcd91c8c296dc6676307f66bfdf4ab78d 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -481,3 +481,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/arm64/tools/syscall_32.tbl b/arch/arm64/tools/syscall_32.tbl
index 69a829912a05eb8a3e21ed701d1030e31c0148bc..0765b3a8d6d600d7b9f83182d401d9f80c03476c 100644
--- a/arch/arm64/tools/syscall_32.tbl
+++ b/arch/arm64/tools/syscall_32.tbl
@@ -478,3 +478,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index f5ed71f1910d09769c845c2d062d99ee0449437c..9fe47112c586f152662af38a9a7f90957cb96cf8 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -466,3 +466,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 680f568b77f2cbefc3eacb2517f276041f229b1e..7b6e97828e552d4da90046ddfcd4a55723e522bb 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -472,3 +472,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 0b9b7e25b69ad592642f8533bee9ccfe95ce9626..aa70e371bb54ab5d9c8dd8923b6ecf9693ee914d 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -405,3 +405,4 @@
464 n32 getxattrat sys_getxattrat
465 n32 listxattrat sys_listxattrat
466 n32 removexattrat sys_removexattrat
+467 n32 open_tree_attr sys_open_tree_attr
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c844cd5cda620b2809a397cdd6f4315ab6a1bfe2..1e8c44c7b61492eabf00c777831e457a7a6e579c 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -381,3 +381,4 @@
464 n64 getxattrat sys_getxattrat
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
+467 n64 open_tree_attr sys_open_tree_attr
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index e8a57c2067580618f4635f9582bcec4c4b2a92c5..dbbba6c4465f6af5124c8c9f1ade4da8c5407ca6 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -454,3 +454,4 @@
464 o32 getxattrat sys_getxattrat
465 o32 listxattrat sys_listxattrat
466 o32 removexattrat sys_removexattrat
+467 o32 open_tree_attr sys_open_tree_attr
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index d9fc94c869657fcfbd7aca1d5f5abc9fae2fb9d8..94df3cb957e9d547d192e8732c0cf23ef2b5ce5d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -465,3 +465,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index d8b4ab78bef076bd50d49b87dea5060fd8c1686a..9a084bdb892694bc562f514b55212d167cbac12f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -557,3 +557,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index e9115b4d8b635b846e5c9ad6ce229605323723a5..a4569b96ef06c54ce7aa795d039541c90a38284f 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -469,3 +469,4 @@
464 common getxattrat sys_getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index c8cad33bf250ea110de37bd1407f5a43ec5e38f2..52a7652fcff6394b96ace1f3b0ed72250ee5e669 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -470,3 +470,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 727f99d333b304b3db0711953a3d91ece18a28eb..83e45eb6c095a36baaf749927628e6052fe900e6 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -512,3 +512,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4d0fb2fba7e208ae9455459afe11e277321d9f74..3f0ec87d5db4e290c62e2ffa6391a431a00dd36a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -472,3 +472,4 @@
464 i386 getxattrat sys_getxattrat
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
+467 i386 open_tree_attr sys_open_tree_attr
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c791debd6cfc5322583b2ae53f6437..cfb5ca41e30de1a4e073750096f5b51a2ec137d2 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -390,6 +390,7 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 37effc1b134eea061f2c350c1d68b4436b65a4dd..f657a77314f8667fa019a01e10c84ea270024adc 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/fs/namespace.c b/fs/namespace.c
index ef7db39c8776ab13da968d7f69c56698e3846914..2a188a6c8df2bceed61c86877e91b87fd154c5a0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4902,6 +4902,45 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
return err;
}
+SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
+ unsigned, flags, struct mount_attr __user *, uattr,
+ size_t, usize)
+{
+ struct file __free(fput) *file = NULL;
+ int fd;
+
+ if (!uattr && usize)
+ return -EINVAL;
+
+ file = vfs_open_tree(dfd, filename, flags);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ if (uattr) {
+ int ret;
+ struct mount_kattr kattr = {
+ .recurse = !!(flags & AT_RECURSIVE),
+ };
+
+ ret = copy_mount_setattr(uattr, usize, &kattr);
+ if (ret)
+ return ret;
+
+ ret = do_mount_setattr(&file->f_path, &kattr);
+ if (ret)
+ return ret;
+
+ finish_mount_kattr(&kattr);
+ }
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ fd_install(fd, no_free_ptr(file));
+ return fd;
+}
+
int show_path(struct seq_file *m, struct dentry *root)
{
if (root->d_sb->s_op->show_path)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6333204d45130eb022f6db460eea34a1f6e91db..079ea1d09d85e60bd87fbbe66e1b2f551705c56d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);
asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
+asmlinkage long sys_open_tree_attr(int dfd, const char __user *path,
+ unsigned flags,
+ struct mount_attr __user *uattr,
+ size_t usize);
asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
int to_dfd, const char __user *to_path,
unsigned int ms_flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 88dc393c2bca38c0fa1b3fae579f7cfe4931223c..2892a45023af6d3eb941623d4fed04841ab07e02 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat)
__SYSCALL(__NR_listxattrat, sys_listxattrat)
#define __NR_removexattrat 466
__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
#undef __NR_syscalls
-#define __NR_syscalls 467
+#define __NR_syscalls 468
/*
* 32 bit systems traditionally used different
diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
index ebbdb3c42e9f74613b003014c0baf44c842bb756..580b4e246aecd5f07d542943ba68fc4ed5961660 100644
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -407,3 +407,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 4/5] fs: add kflags member to struct mount_kattr
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
` (2 preceding siblings ...)
2025-01-28 10:33 ` [PATCH 3/5] fs: add open_tree_attr() Christian Brauner
@ 2025-01-28 10:33 ` Christian Brauner
2025-01-28 10:33 ` [PATCH 5/5] fs: allow changing idmappings Christian Brauner
2025-01-30 14:30 ` [PATCH 0/5] " Seth Forshee
5 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2025-01-28 10:33 UTC (permalink / raw)
To: linux-fsdevel
Cc: Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton, Jan Kara,
Lennart Poettering, Daan De Meyer, Seth Forshee, Arnd Bergmann,
Christian Brauner
Instead of using a boolean use a flag so we can add new flags in
following patches.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/namespace.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2a188a6c8df2bceed61c86877e91b87fd154c5a0..2405f839202b3916bcdc4304599996a55ce5deb7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -84,12 +84,16 @@ static DEFINE_SEQLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
+enum mount_kattr_flags_t {
+ MOUNT_KATTR_RECURSE = (1 << 0),
+};
+
struct mount_kattr {
unsigned int attr_set;
unsigned int attr_clr;
unsigned int propagation;
unsigned int lookup_flags;
- bool recurse;
+ enum mount_kattr_flags_t kflags;
struct user_namespace *mnt_userns;
struct mnt_idmap *mnt_idmap;
};
@@ -4579,7 +4583,7 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
break;
}
- if (!kattr->recurse)
+ if (!(kattr->kflags & MOUNT_KATTR_RECURSE))
return 0;
}
@@ -4640,7 +4644,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
if (kattr->propagation)
change_mnt_propagation(m, kattr->propagation);
- if (!kattr->recurse)
+ if (!(kattr->kflags & MOUNT_KATTR_RECURSE))
break;
}
touch_mnt_namespace(mnt->mnt_ns);
@@ -4670,7 +4674,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
*/
namespace_lock();
if (kattr->propagation == MS_SHARED) {
- err = invent_group_ids(mnt, kattr->recurse);
+ err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE);
if (err) {
namespace_unlock();
return err;
@@ -4886,9 +4890,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
kattr = (struct mount_kattr) {
.lookup_flags = lookup_flags,
- .recurse = !!(flags & AT_RECURSIVE),
};
+ if (flags & AT_RECURSIVE)
+ kattr.kflags |= MOUNT_KATTR_RECURSE;
+
err = copy_mount_setattr(uattr, usize, &kattr);
if (err)
return err;
@@ -4918,9 +4924,10 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
if (uattr) {
int ret;
- struct mount_kattr kattr = {
- .recurse = !!(flags & AT_RECURSIVE),
- };
+ struct mount_kattr kattr = {};
+
+ if (flags & AT_RECURSIVE)
+ kattr.kflags |= MOUNT_KATTR_RECURSE;
ret = copy_mount_setattr(uattr, usize, &kattr);
if (ret)
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 5/5] fs: allow changing idmappings
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
` (3 preceding siblings ...)
2025-01-28 10:33 ` [PATCH 4/5] fs: add kflags member to struct mount_kattr Christian Brauner
@ 2025-01-28 10:33 ` Christian Brauner
2025-01-30 14:30 ` [PATCH 0/5] " Seth Forshee
5 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2025-01-28 10:33 UTC (permalink / raw)
To: linux-fsdevel
Cc: Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton, Jan Kara,
Lennart Poettering, Daan De Meyer, Seth Forshee, Arnd Bergmann,
Christian Brauner
This patchset makes it possible to create a new idmapped mount from an
already idmapped mount and to clear idmappings.
// Create a first idmapped mount
struct mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP
.userns_fd = fd_userns
};
fd_tree = open_tree(-EBADF, "/", OPEN_TREE_CLONE, &attr, sizeof(attr));
move_mount(fd_tree, "", -EBADF, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
// Create a second idmapped mount from the first idmapped mount
attr.attr_set = MOUNT_ATTR_IDMAP;
attr.userns_fd = fd_userns2;
fd_tree2 = open_tree(-EBADF, "/mnt", OPEN_TREE_CLONE, &attr, sizeof(attr));
// Create a second non-idmapped mount from the first idmapped mount:
memset(&attr, 0, sizeof(attr));
attr.attr_clr = MOUNT_ATTR_IDMAP;
fd_tree2 = open_tree(-EBADF, "/mnt", OPEN_TREE_CLONE, &attr, sizeof(attr));
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/namespace.c | 53 ++++++++++++++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2405f839202b3916bcdc4304599996a55ce5deb7..453edc27b2a46c6b6c9a321af8769f3ee1de147a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -86,6 +86,7 @@ static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
enum mount_kattr_flags_t {
MOUNT_KATTR_RECURSE = (1 << 0),
+ MOUNT_KATTR_IDMAP_REPLACE = (1 << 1),
};
struct mount_kattr {
@@ -4519,11 +4520,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
return -EINVAL;
/*
- * Once a mount has been idmapped we don't allow it to change its
- * mapping. It makes things simpler and callers can just create
- * another bind-mount they can idmap if they want to.
+ * We only allow an mount to change it's idmapping if it has
+ * never been accessible to userspace.
*/
- if (is_idmapped_mnt(m))
+ if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m))
return -EPERM;
/* The underlying filesystem doesn't support idmapped mounts yet. */
@@ -4613,18 +4613,16 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
+ struct mnt_idmap *old_idmap;
+
if (!kattr->mnt_idmap)
return;
- /*
- * Pairs with smp_load_acquire() in mnt_idmap().
- *
- * Since we only allow a mount to change the idmapping once and
- * verified this in can_idmap_mount() we know that the mount has
- * @nop_mnt_idmap attached to it. So there's no need to drop any
- * references.
- */
+ old_idmap = mnt_idmap(&mnt->mnt);
+
+ /* Pairs with smp_load_acquire() in mnt_idmap(). */
smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));
+ mnt_idmap_put(old_idmap);
}
static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
@@ -4733,13 +4731,23 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
return 0;
- /*
- * We currently do not support clearing an idmapped mount. If this ever
- * is a use-case we can revisit this but for now let's keep it simple
- * and not allow it.
- */
- if (attr->attr_clr & MOUNT_ATTR_IDMAP)
- return -EINVAL;
+ if (attr->attr_clr & MOUNT_ATTR_IDMAP) {
+ /*
+ * We can only remove an idmapping if it's never been
+ * exposed to userspace.
+ */
+ if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE))
+ return -EINVAL;
+
+ /*
+ * Removal of idmappings is equivalent to setting
+ * nop_mnt_idmap.
+ */
+ if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) {
+ kattr->mnt_idmap = &nop_mnt_idmap;
+ return 0;
+ }
+ }
if (attr->userns_fd > INT_MAX)
return -EINVAL;
@@ -4830,8 +4838,10 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
static void finish_mount_kattr(struct mount_kattr *kattr)
{
- put_user_ns(kattr->mnt_userns);
- kattr->mnt_userns = NULL;
+ if (kattr->mnt_userns) {
+ put_user_ns(kattr->mnt_userns);
+ kattr->mnt_userns = NULL;
+ }
if (kattr->mnt_idmap)
mnt_idmap_put(kattr->mnt_idmap);
@@ -4926,6 +4936,7 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
int ret;
struct mount_kattr kattr = {};
+ kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 0/5] fs: allow changing idmappings
2025-01-28 10:33 [PATCH 0/5] fs: allow changing idmappings Christian Brauner
` (4 preceding siblings ...)
2025-01-28 10:33 ` [PATCH 5/5] fs: allow changing idmappings Christian Brauner
@ 2025-01-30 14:30 ` Seth Forshee
5 siblings, 0 replies; 7+ messages in thread
From: Seth Forshee @ 2025-01-30 14:30 UTC (permalink / raw)
To: Christian Brauner
Cc: linux-fsdevel, Amir Goldstein, Al Viro, Josef Bacik, Jeff Layton,
Jan Kara, Lennart Poettering, Daan De Meyer, Arnd Bergmann
On Tue, Jan 28, 2025 at 11:33:38AM +0100, Christian Brauner wrote:
> /* solution */
>
> So, to avoid all of these pitfalls creating an idmapped mount from an
> already idmapped mount will be done atomically, i.e., a new detached
> mount is created and a new set of mount properties applied to it without
> it ever having been exposed to userspace at all.
>
> This can be done in two ways. A new flag to open_tree() is added
> OPEN_TREE_CLEAR_IDMAP that clears the old idmapping and returns a mount
> that isn't idmapped. And then it is possible to set mount attributes on
> it again including creation of an idmapped mount.
>
> This has the consequence that a file descriptor must exist in userspace
> that doesn't have any idmapping applied and it will thus never work in
> unpriviledged scenarios. As a container would be able to remove the
> idmapping of the mount it has been given. That should be avoided.
>
> Instead, we add open_tree_attr() which works just like open_tree() but
> takes an optional struct mount_attr parameter. This is useful beyond
> idmappings as it fills a gap where a mount never exists in userspace
> without the necessary mount properties applied.
>
> This is particularly useful for mount options such as
> MOUNT_ATTR_{RDONLY,NOSUID,NODEV,NOEXEC}.
>
> To create a new idmapped mount the following works:
>
> // Create a first idmapped mount
> struct mount_attr attr = {
> .attr_set = MOUNT_ATTR_IDMAP
> .userns_fd = fd_userns
> };
>
> fd_tree = open_tree(-EBADF, "/", OPEN_TREE_CLONE, &attr, sizeof(attr));
> move_mount(fd_tree, "", -EBADF, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
>
> // Create a second idmapped mount from the first idmapped mount
> attr.attr_set = MOUNT_ATTR_IDMAP;
> attr.userns_fd = fd_userns2;
> fd_tree2 = open_tree(-EBADF, "/mnt", OPEN_TREE_CLONE, &attr, sizeof(attr));
>
> // Create a second non-idmapped mount from the first idmapped mount:
> memset(&attr, 0, sizeof(attr));
> attr.attr_clr = MOUNT_ATTR_IDMAP;
> fd_tree2 = open_tree(-EBADF, "/mnt", OPEN_TREE_CLONE, &attr, sizeof(attr));
This approach seems reasonable to me, and the patches look good.
Reviewed-by: Seth Forshee (DigitalOcean) <sforshee@kernel.org>
^ permalink raw reply [flat|nested] 7+ messages in thread