* [PATCH v3 2/4] procfs: add "pidns" mount option
2025-07-24 8:32 [PATCH v3 0/4] procfs: make reference pidns more user-visible Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 1/4] pidns: move is-ancestor logic to helper Aleksa Sarai
@ 2025-07-24 8:32 ` Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 3/4] procfs: add PROCFS_GET_PID_NAMESPACE ioctl Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 4/4] selftests/proc: add tests for new pidns APIs Aleksa Sarai
3 siblings, 0 replies; 5+ messages in thread
From: Aleksa Sarai @ 2025-07-24 8:32 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Jan Kara, Jonathan Corbet,
Shuah Khan
Cc: Andy Lutomirski, linux-kernel, linux-fsdevel, linux-api,
linux-doc, linux-kselftest, Aleksa Sarai
Since the introduction of pid namespaces, their interaction with procfs
has been entirely implicit in ways that require a lot of dancing around
by programs that need to construct sandboxes with different PID
namespaces.
Being able to explicitly specify the pid namespace to use when
constructing a procfs super block will allow programs to no longer need
to fork off a process which does then does unshare(2) / setns(2) and
forks again in order to construct a procfs in a pidns.
So, provide a "pidns" mount option which allows such users to just
explicitly state which pid namespace they want that procfs instance to
use. This interface can be used with fsconfig(2) either with a file
descriptor or a path:
fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd);
fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0);
or with classic mount(2) / mount(8):
// mount -t proc -o pidns=/proc/self/ns/pid proc /tmp/proc
mount("proc", "/tmp/proc", "proc", MS_..., "pidns=/proc/self/ns/pid");
As this new API is effectively shorthand for setns(2) followed by
mount(2), the permission model for this mirrors pidns_install() to avoid
opening up new attack surfaces by loosening the existing permission
model.
In order to avoid having to RCU-protect all users of proc_pid_ns() (to
avoid UAFs), attempting to reconfigure an existing procfs instance's pid
namespace will error out with -EBUSY. Creating new procfs instances is
quite cheap, so this should not be an impediment to most users, and lets
us avoid a lot of churn in fs/proc/* for a feature that it seems
unlikely userspace would use.
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
Documentation/filesystems/proc.rst | 8 +++
fs/proc/root.c | 102 ++++++++++++++++++++++++++++++++++---
2 files changed, 104 insertions(+), 6 deletions(-)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 5236cb52e357..5a157dadea0b 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -2360,6 +2360,7 @@ The following mount options are supported:
hidepid= Set /proc/<pid>/ access mode.
gid= Set the group authorized to learn processes information.
subset= Show only the specified subset of procfs.
+ pidns= Specify a the namespace used by this procfs.
========= ========================================================
hidepid=off or hidepid=0 means classic mode - everybody may access all
@@ -2392,6 +2393,13 @@ information about processes information, just add identd to this group.
subset=pid hides all top level files and directories in the procfs that
are not related to tasks.
+pidns= specifies a pid namespace (either as a string path to something like
+`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) that
+will be used by the procfs instance when translating pids. By default, procfs
+will use the calling process's active pid namespace. Note that the pid
+namespace of an existing procfs instance cannot be modified (attempting to do
+so will give an `-EBUSY` error).
+
Chapter 5: Filesystem behavior
==============================
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ed86ac710384..22f8b10f6265 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -38,12 +38,18 @@ enum proc_param {
Opt_gid,
Opt_hidepid,
Opt_subset,
+#ifdef CONFIG_PID_NS
+ Opt_pidns,
+#endif
};
static const struct fs_parameter_spec proc_fs_parameters[] = {
- fsparam_u32("gid", Opt_gid),
+ fsparam_u32("gid", Opt_gid),
fsparam_string("hidepid", Opt_hidepid),
fsparam_string("subset", Opt_subset),
+#ifdef CONFIG_PID_NS
+ fsparam_file_or_string("pidns", Opt_pidns),
+#endif
{}
};
@@ -109,11 +115,67 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
return 0;
}
+#ifdef CONFIG_PID_NS
+static int proc_parse_pidns_param(struct fs_context *fc,
+ struct fs_parameter *param,
+ struct fs_parse_result *result)
+{
+ struct proc_fs_context *ctx = fc->fs_private;
+ struct pid_namespace *target, *active = task_active_pid_ns(current);
+ struct ns_common *ns;
+ struct file *ns_filp __free(fput) = NULL;
+
+ switch (param->type) {
+ case fs_value_is_file:
+ /* came throug fsconfig, steal the file reference */
+ ns_filp = param->file;
+ param->file = NULL;
+ break;
+ case fs_value_is_string:
+ ns_filp = filp_open(param->string, O_RDONLY, 0);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ break;
+ }
+ if (!ns_filp)
+ ns_filp = ERR_PTR(-EBADF);
+ if (IS_ERR(ns_filp)) {
+ errorfc(fc, "could not get file from pidns argument");
+ return PTR_ERR(ns_filp);
+ }
+
+ if (!proc_ns_file(ns_filp))
+ return invalfc(fc, "pidns argument is not an nsfs file");
+ ns = get_proc_ns(file_inode(ns_filp));
+ if (ns->ops->type != CLONE_NEWPID)
+ return invalfc(fc, "pidns argument is not a pidns file");
+ target = container_of(ns, struct pid_namespace, ns);
+
+ /*
+ * pidns= is shorthand for joining the pidns to get a fsopen fd, so the
+ * permission model should be the same as pidns_install().
+ */
+ if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) {
+ errorfc(fc, "insufficient permissions to set pidns");
+ return -EPERM;
+ }
+ if (!pidns_is_ancestor(target, active))
+ return invalfc(fc, "cannot set pidns to non-descendant pidns");
+
+ put_pid_ns(ctx->pid_ns);
+ ctx->pid_ns = get_pid_ns(target);
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
+ return 0;
+}
+#endif /* CONFIG_PID_NS */
+
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct proc_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
- int opt;
+ int opt, err;
opt = fs_parse(fc, proc_fs_parameters, param, &result);
if (opt < 0)
@@ -125,15 +187,36 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_hidepid:
- if (proc_parse_hidepid_param(fc, param))
- return -EINVAL;
+ err = proc_parse_hidepid_param(fc, param);
+ if (err)
+ return err;
break;
case Opt_subset:
- if (proc_parse_subset_param(fc, param->string) < 0)
- return -EINVAL;
+ err = proc_parse_subset_param(fc, param->string);
+ if (err)
+ return err;
break;
+#ifdef CONFIG_PID_NS
+ case Opt_pidns:
+ /*
+ * We would have to RCU-protect every proc_pid_ns() or
+ * proc_sb_info() access if we allowed this to be reconfigured
+ * for an existing procfs instance. Luckily, procfs instances
+ * are cheap to create, and mount-beneath would let you
+ * atomically replace an instance even with overmounts.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ errorfc(fc, "cannot reconfigure pidns for existing procfs");
+ return -EBUSY;
+ }
+ err = proc_parse_pidns_param(fc, param, &result);
+ if (err)
+ return err;
+ break;
+#endif
+
default:
return -EINVAL;
}
@@ -154,6 +237,13 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
fs_info->hide_pid = ctx->hidepid;
if (ctx->mask & (1 << Opt_subset))
fs_info->pidonly = ctx->pidonly;
+#ifdef CONFIG_PID_NS
+ if (ctx->mask & (1 << Opt_pidns) &&
+ !WARN_ON_ONCE(fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)) {
+ put_pid_ns(fs_info->pid_ns);
+ fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
+ }
+#endif
}
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
--
2.50.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 3/4] procfs: add PROCFS_GET_PID_NAMESPACE ioctl
2025-07-24 8:32 [PATCH v3 0/4] procfs: make reference pidns more user-visible Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 1/4] pidns: move is-ancestor logic to helper Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 2/4] procfs: add "pidns" mount option Aleksa Sarai
@ 2025-07-24 8:32 ` Aleksa Sarai
2025-07-24 8:32 ` [PATCH v3 4/4] selftests/proc: add tests for new pidns APIs Aleksa Sarai
3 siblings, 0 replies; 5+ messages in thread
From: Aleksa Sarai @ 2025-07-24 8:32 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Jan Kara, Jonathan Corbet,
Shuah Khan
Cc: Andy Lutomirski, linux-kernel, linux-fsdevel, linux-api,
linux-doc, linux-kselftest, Aleksa Sarai
/proc has historically had very opaque semantics about PID namespaces,
which is a little unfortunate for container runtimes and other programs
that deal with switching namespaces very often. One common issue is that
of converting between PIDs in the process's namespace and PIDs in the
namespace of /proc.
In principle, it is possible to do this today by opening a pidfd with
pidfd_open(2) and then looking at /proc/self/fdinfo/$n (which will
contain a PID value translated to the pid namespace associated with that
procfs superblock). However, allocating a new file for each PID to be
converted is less than ideal for programs that may need to scan procfs,
and it is generally useful for userspace to be able to finally get this
information from procfs.
So, add a new API for this in the form of an ioctl(2) you can call on
the root directory of procfs. The returned file descriptor will have
O_CLOEXEC set. This acts as a sister feature to the new "pidns" mount
option, finally allowing userspace full control of the pid namespaces
associated with procfs instances.
The permission model for this is a bit looser than that of the "pidns"
mount option, but this is mainly because /proc/1/ns/pid provides the
same information, so as long as you have access to that magic-link (or
something equivalently reasonable such as privileges with CAP_SYS_ADMIN
or being in an ancestor pid namespace) it makes sense to allow userspace
to grab a handle. setns(2) will still have their own permission checks,
so being able to open a pidns handle doesn't really provide too many
other capabilities.
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
Documentation/filesystems/proc.rst | 4 +++
fs/proc/root.c | 54 ++++++++++++++++++++++++++++++++++++--
include/uapi/linux/fs.h | 3 +++
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 5a157dadea0b..840f820fb467 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -2400,6 +2400,10 @@ will use the calling process's active pid namespace. Note that the pid
namespace of an existing procfs instance cannot be modified (attempting to do
so will give an `-EBUSY` error).
+Processes can check which pid namespace is used by a procfs instance by using
+the `PROCFS_GET_PID_NAMESPACE` ioctl() on the root directory of the procfs
+instance.
+
Chapter 5: Filesystem behavior
==============================
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 22f8b10f6265..c6110436e528 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,8 +23,10 @@
#include <linux/cred.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/ptrace.h>
#include "internal.h"
+#include "../internal.h"
struct proc_fs_context {
struct pid_namespace *pid_ns;
@@ -430,15 +432,63 @@ static int proc_root_readdir(struct file *file, struct dir_context *ctx)
return proc_pid_readdir(file, ctx);
}
+static long int proc_root_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+#ifdef CONFIG_PID_NS
+ case PROCFS_GET_PID_NAMESPACE: {
+ struct pid_namespace *active = task_active_pid_ns(current);
+ struct pid_namespace *ns = proc_pid_ns(file_inode(filp)->i_sb);
+ bool can_access_pidns = false;
+
+ /*
+ * If we are in an ancestors of the pidns, or have join
+ * privileges (CAP_SYS_ADMIN), then it makes sense that we
+ * would be able to grab a handle to the pidns.
+ *
+ * Otherwise, if there is a root process, then being able to
+ * access /proc/$pid/ns/pid is equivalent to this ioctl and so
+ * we should probably match the permission model. For empty
+ * namespaces it seems unlikely for there to be a downside to
+ * allowing unprivileged users to open a handle to it (setns
+ * will fail for unprivileged users anyway).
+ */
+ can_access_pidns = pidns_is_ancestor(ns, active) ||
+ ns_capable(ns->user_ns, CAP_SYS_ADMIN);
+ if (!can_access_pidns) {
+ bool cannot_ptrace_pid1 = false;
+
+ read_lock(&tasklist_lock);
+ if (ns->child_reaper)
+ cannot_ptrace_pid1 = ptrace_may_access(ns->child_reaper,
+ PTRACE_MODE_READ_FSCREDS);
+ read_unlock(&tasklist_lock);
+ can_access_pidns = !cannot_ptrace_pid1;
+ }
+ if (!can_access_pidns)
+ return -EPERM;
+
+ /* open_namespace() unconditionally consumes the reference. */
+ get_pid_ns(ns);
+ return open_namespace(to_ns_common(ns));
+ }
+#endif /* CONFIG_PID_NS */
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
/*
* The root /proc directory is special, as it has the
* <pid> directories. Thus we don't use the generic
* directory handling functions for that..
*/
static const struct file_operations proc_root_operations = {
- .read = generic_read_dir,
- .iterate_shared = proc_root_readdir,
+ .read = generic_read_dir,
+ .iterate_shared = proc_root_readdir,
.llseek = generic_file_llseek,
+ .unlocked_ioctl = proc_root_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 0bd678a4a10e..aa642cb48feb 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -437,6 +437,9 @@ typedef int __bitwise __kernel_rwf_t;
#define PROCFS_IOCTL_MAGIC 'f'
+/* procfs root ioctls */
+#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 1)
+
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
--
2.50.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 4/4] selftests/proc: add tests for new pidns APIs
2025-07-24 8:32 [PATCH v3 0/4] procfs: make reference pidns more user-visible Aleksa Sarai
` (2 preceding siblings ...)
2025-07-24 8:32 ` [PATCH v3 3/4] procfs: add PROCFS_GET_PID_NAMESPACE ioctl Aleksa Sarai
@ 2025-07-24 8:32 ` Aleksa Sarai
3 siblings, 0 replies; 5+ messages in thread
From: Aleksa Sarai @ 2025-07-24 8:32 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Jan Kara, Jonathan Corbet,
Shuah Khan
Cc: Andy Lutomirski, linux-kernel, linux-fsdevel, linux-api,
linux-doc, linux-kselftest, Aleksa Sarai
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
tools/testing/selftests/proc/.gitignore | 1 +
tools/testing/selftests/proc/Makefile | 1 +
tools/testing/selftests/proc/proc-pidns.c | 252 ++++++++++++++++++++++++++++++
3 files changed, 254 insertions(+)
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 973968f45bba..2dced03e9e0e 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -17,6 +17,7 @@
/proc-tid0
/proc-uptime-001
/proc-uptime-002
+/proc-pidns
/read
/self
/setns-dcache
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index b12921b9794b..c6f7046b9860 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -27,5 +27,6 @@ TEST_GEN_PROGS += setns-sysvipc
TEST_GEN_PROGS += thread-self
TEST_GEN_PROGS += proc-multiple-procfs
TEST_GEN_PROGS += proc-fsconfig-hidepid
+TEST_GEN_PROGS += proc-pidns
include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/selftests/proc/proc-pidns.c
new file mode 100644
index 000000000000..5994375e2377
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-pidns.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2025 SUSE LLC.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+
+#include "../kselftest_harness.h"
+
+#define ASSERT_ERRNO(expected, _t, seen) \
+ __EXPECT(expected, #expected, \
+ ({__typeof__(seen) _tmp_seen = (seen); \
+ _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1)
+
+#define ASSERT_ERRNO_EQ(expected, seen) \
+ ASSERT_ERRNO(expected, ==, seen)
+
+#define ASSERT_SUCCESS(seen) \
+ ASSERT_ERRNO(0, <=, seen)
+
+static int touch(char *path)
+{
+ int fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644);
+ if (fd < 0)
+ return -1;
+ return close(fd);
+}
+
+FIXTURE(ns)
+{
+ int host_mntns, host_pidns;
+ int dummy_pidns;
+};
+
+FIXTURE_SETUP(ns)
+{
+ /* Stash the old mntns. */
+ self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_mntns);
+
+ /* Create a new mount namespace and make it private. */
+ ASSERT_SUCCESS(unshare(CLONE_NEWNS));
+ ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
+
+ /*
+ * Create a proper tmpfs that we can use and will disappear once we
+ * leave this mntns.
+ */
+ ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL));
+
+ /*
+ * Create a pidns we can use for later tests. We need to fork off a
+ * child so that we get a usable nsfd that we can bind-mount and open.
+ */
+ ASSERT_SUCCESS(touch("/tmp/dummy-pidns"));
+
+ self->host_pidns = open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_pidns);
+ ASSERT_SUCCESS(unshare(CLONE_NEWPID));
+
+ pid_t pid = fork();
+ ASSERT_SUCCESS(pid);
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy-pidns", NULL, MS_BIND, 0));
+ exit(0);
+ }
+
+ int wstatus;
+ ASSERT_EQ(waitpid(pid, &wstatus, 0), pid);
+ ASSERT_TRUE(WIFEXITED(wstatus));
+ ASSERT_EQ(WEXITSTATUS(wstatus), 0);
+
+ ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID));
+
+ self->dummy_pidns = open("/tmp/dummy-pidns", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->dummy_pidns);
+}
+
+FIXTURE_TEARDOWN(ns)
+{
+ ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS));
+ ASSERT_SUCCESS(close(self->host_mntns));
+
+ ASSERT_SUCCESS(close(self->host_pidns));
+ ASSERT_SUCCESS(close(self->dummy_pidns));
+}
+
+TEST_F(ns, pidns_mount_string_path)
+{
+ ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=/proc/self/ns/pid"));
+ ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK));
+
+ ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=/tmp/dummy-pidns"));
+ ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/1/", X_OK));
+ ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/self/", X_OK));
+}
+
+TEST_F(ns, pidns_fsconfig_string_path)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy-pidns", 0));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_fsconfig_fd)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_reconfigure_remount)
+{
+ ASSERT_SUCCESS(mkdir("/tmp/proc", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, ""));
+
+ ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK));
+ ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK));
+
+ ASSERT_ERRNO_EQ(-EBUSY, mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns=/tmp/dummy-pidns"));
+
+ ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK));
+ ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK));
+}
+
+TEST_F(ns, pidns_reconfigure_fsconfig_string_path)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy-pidns", 0));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_reconfigure_fsconfig_fd)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+int is_same_inode(int fd1, int fd2)
+{
+ struct stat stat1, stat2;
+
+ assert(fstat(fd1, &stat1) == 0);
+ assert(fstat(fd2, &stat2) == 0);
+
+ return stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev;
+}
+
+#define PROCFS_IOCTL_MAGIC 'f'
+#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 1)
+
+TEST_F(ns, get_pidns_ioctl)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ /* fsmount returns an O_PATH, which ioctl(2) doesn't accept. */
+ int new_mountfd = openat(mountfd, ".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ ASSERT_SUCCESS(new_mountfd);
+
+ ASSERT_SUCCESS(close(mountfd));
+ mountfd = -EBADF;
+
+ int procfs_pidns = ioctl(new_mountfd, PROCFS_GET_PID_NAMESPACE);
+ ASSERT_SUCCESS(procfs_pidns);
+
+ ASSERT_NE(self->dummy_pidns, procfs_pidns);
+ ASSERT_FALSE(is_same_inode(self->host_pidns, procfs_pidns));
+ ASSERT_TRUE(is_same_inode(self->dummy_pidns, procfs_pidns));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(new_mountfd));
+ ASSERT_SUCCESS(close(procfs_pidns));
+}
+
+TEST_HARNESS_MAIN
--
2.50.0
^ permalink raw reply related [flat|nested] 5+ messages in thread