From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
To: Bjoern Doebel <doebel@amazon.com>
Cc: Christian Brauner <brauner@kernel.org>,
Shuah Khan <shuah@kernel.org>,
"open list:NAMESPACES:" <linux-kernel@vger.kernel.org>,
"open list:KERNEL SELFTEST FRAMEWORK"
<linux-kselftest@vger.kernel.org>
Subject: Re: [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically
Date: Wed, 22 Apr 2026 15:00:12 +0200 [thread overview]
Message-ID: <301ac97e-3f7f-4979-92b0-d5124ff571a7@virtuozzo.com> (raw)
In-Reply-To: <20260421194344.2981537-1-doebel@amazon.com>
On 4/21/26 21:43, Bjoern Doebel wrote:
> This is the first email you've received from this external sender.
> Do not click links or open attachments unless it is an email you expected to receive.
> The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
> kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
> On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
> minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
> returns EINVAL and all three tests fail.
>
> Compute these limits the same way as the kernel does and set outer_limit
> and inner_limit dynamically based on the result. Original test semantics
> are preserved (outer < inner, nested namespace capped by parent).
>
> Signed-off-by: Bjoern Doebel <doebel@amazon.com>
> ---
> .../testing/selftests/pid_namespace/pid_max.c | 153 ++++++++++++++----
> 1 file changed, 119 insertions(+), 34 deletions(-)
>
> diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
> index c9519e7385b6..8bd600f55421 100644
> --- a/tools/testing/selftests/pid_namespace/pid_max.c
> +++ b/tools/testing/selftests/pid_namespace/pid_max.c
> @@ -12,10 +12,76 @@
> #include <syscall.h>
> #include <sys/mount.h>
> #include <sys/wait.h>
> +#include <unistd.h>
>
> #include "kselftest_harness.h"
> #include "../pidfd/pidfd.h"
>
> +/*
> + * The kernel computes the minimum allowed pid_max as:
> + * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
> + * Mirror that here so the test values are always valid.
> + *
> + * Note: glibc's get_nprocs_conf() returns the number of *configured*
> + * (present) CPUs, not *possible* CPUs. The kernel uses
> + * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
> + * These can differ significantly (e.g. 16 configured vs 128 possible).
> + */
> +#define RESERVED_PIDS 300
> +#define PIDS_PER_CPU_MIN 8
> +
> +/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
> +static int num_possible_cpus(void)
> +{
> + FILE *f;
> + int count = 0;
> + int lo, hi;
> +
> + f = fopen("/sys/devices/system/cpu/possible", "r");
> + if (!f)
> + return 0;
> +
> + while (fscanf(f, "%d", &lo) == 1) {
> + if (fscanf(f, "-%d", &hi) == 1)
> + count += hi - lo + 1;
> + else
> + count++;
> + /* skip comma separator */
> + fscanf(f, ",");
> + }
> +
> + fclose(f);
> + return count;
> +}
> +
> +static int pid_min(void)
> +{
> + int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
> +
> + return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
> +}
> +
> +struct pid_max_cfg {
> + int outer;
> + int inner;
> +};
> +
> +/*
> + * Outer and inner pid_max limits used by the tests. The outer limit is
> + * the more restrictive ancestor; the inner limit is set higher in a
> + * nested namespace but must still be capped by the outer limit.
> + * Both are derived from the kernel's minimum so they are always writable.
> + */
> +#define PID_MAX_CFG_INIT { .outer = pid_min() + 100, .inner = pid_min() + 200 }
> +
> +static int write_int_to_fd(int fd, int val)
> +{
> + char buf[24];
Why 24? Since val is int and longest int is INT_MIN, which has 11 characters, so 12 should be enough.
> + int len = snprintf(buf, sizeof(buf), "%d", val);
> +
> + return write(fd, buf, len);
> +}
> +
> #define __STACK_SIZE (8 * 1024 * 1024)
> static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
> {
> @@ -37,6 +103,7 @@ static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
>
> static int pid_max_cb(void *data)
> {
> + struct pid_max_cfg *cfg = data;
> int fd, ret;
> pid_t pid;
>
> @@ -60,18 +127,18 @@ static int pid_max_cb(void *data)
> return -1;
> }
>
> - ret = write(fd, "500", sizeof("500") - 1);
> + ret = write_int_to_fd(fd, cfg->inner);
> if (ret < 0) {
> fprintf(stderr, "%m - Failed to write pid_max\n");
> return -1;
> }
>
> - for (int i = 0; i < 501; i++) {
> + for (int i = 0; i < cfg->inner + 1; i++) {
> pid = fork();
> if (pid == 0)
> exit(EXIT_SUCCESS);
> wait_for_pid(pid);
> - if (pid > 500) {
> + if (pid > cfg->inner) {
> fprintf(stderr, "Managed to create pid number beyond limit\n");
> return -1;
> }
> @@ -82,6 +149,7 @@ static int pid_max_cb(void *data)
>
> static int pid_max_nested_inner(void *data)
> {
> + struct pid_max_cfg *cfg = data;
> int fret = -1;
> pid_t pids[2];
> int fd, i, ret;
> @@ -106,7 +174,7 @@ static int pid_max_nested_inner(void *data)
> return fret;
> }
>
> - ret = write(fd, "500", sizeof("500") - 1);
> + ret = write_int_to_fd(fd, cfg->inner);
> close(fd);
> if (ret < 0) {
> fprintf(stderr, "%m - Failed to write pid_max\n");
> @@ -133,8 +201,8 @@ static int pid_max_nested_inner(void *data)
> return fret;
> }
>
> - /* Now make sure that we wrap pids at 400. */
> - for (i = 0; i < 510; i++) {
> + /* Now make sure that we wrap pids at outer_limit. */
> + for (i = 0; i < cfg->inner + 10; i++) {
> pid_t pid;
>
> pid = fork();
> @@ -145,7 +213,7 @@ static int pid_max_nested_inner(void *data)
> exit(EXIT_SUCCESS);
>
> wait_for_pid(pid);
> - if (pid >= 500) {
> + if (pid >= cfg->inner) {
> fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
> return fret;
> }
> @@ -156,15 +224,20 @@ static int pid_max_nested_inner(void *data)
>
> static int pid_max_nested_outer(void *data)
> {
> - int fret = -1, nr_procs = 400;
> - pid_t pids[1000];
> - int fd, i, ret;
> + struct pid_max_cfg *cfg = data;
> + int fret = -1, nr_procs = 0;
> + pid_t *pids;
> + int fd, ret;
> pid_t pid;
>
> + pids = malloc(cfg->outer * sizeof(pid_t));
> + if (!pids)
> + return -1;
> +
> ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
> if (ret) {
> fprintf(stderr, "%m - Failed to make rootfs private mount\n");
> - return fret;
> + goto out;
> }
>
> umount2("/proc", MNT_DETACH);
> @@ -172,27 +245,27 @@ static int pid_max_nested_outer(void *data)
> ret = mount("proc", "/proc", "proc", 0, NULL);
> if (ret) {
> fprintf(stderr, "%m - Failed to mount proc\n");
> - return fret;
> + goto out;
> }
>
> fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
> if (fd < 0) {
> fprintf(stderr, "%m - Failed to open pid_max\n");
> - return fret;
> + goto out;
> }
>
> - ret = write(fd, "400", sizeof("400") - 1);
> + ret = write_int_to_fd(fd, cfg->outer);
> close(fd);
> if (ret < 0) {
> fprintf(stderr, "%m - Failed to write pid_max\n");
> - return fret;
> + goto out;
> }
>
> /*
> - * Create 397 processes. This leaves room for do_clone() (398) and
> - * one more 399. So creating another process needs to fail.
> + * Create (outer - 4) processes. This leaves room for do_clone()
> + * and one more process. So creating another process needs to fail.
> */
> - for (nr_procs = 0; nr_procs < 396; nr_procs++) {
> + for (nr_procs = 0; nr_procs < cfg->outer - 4; nr_procs++) {
> pid = fork();
> if (pid < 0)
> goto reap;
> @@ -203,7 +276,7 @@ static int pid_max_nested_outer(void *data)
> pids[nr_procs] = pid;
> }
>
> - pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
> + pid = do_clone(pid_max_nested_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
> if (pid < 0) {
> fprintf(stderr, "%m - Failed to clone nested pidns\n");
> goto reap;
> @@ -220,20 +293,27 @@ static int pid_max_nested_outer(void *data)
> for (int i = 0; i < nr_procs; i++)
> wait_for_pid(pids[i]);
>
> +out:
> + free(pids);
> return fret;
> }
>
> static int pid_max_nested_limit_inner(void *data)
> {
> - int fret = -1, nr_procs = 400;
> + struct pid_max_cfg *cfg = data;
> + int fret = -1, nr_procs = 0;
> int fd, ret;
> pid_t pid;
> - pid_t pids[1000];
> + pid_t *pids;
> +
> + pids = malloc(cfg->inner * sizeof(pid_t));
> + if (!pids)
> + return -1;
>
> ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
> if (ret) {
> fprintf(stderr, "%m - Failed to make rootfs private mount\n");
> - return fret;
> + goto out;
> }
>
> umount2("/proc", MNT_DETACH);
> @@ -241,23 +321,23 @@ static int pid_max_nested_limit_inner(void *data)
> ret = mount("proc", "/proc", "proc", 0, NULL);
> if (ret) {
> fprintf(stderr, "%m - Failed to mount proc\n");
> - return fret;
> + goto out;
> }
>
> fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
> if (fd < 0) {
> fprintf(stderr, "%m - Failed to open pid_max\n");
> - return fret;
> + goto out;
> }
>
> - ret = write(fd, "500", sizeof("500") - 1);
> + ret = write_int_to_fd(fd, cfg->inner);
> close(fd);
> if (ret < 0) {
> fprintf(stderr, "%m - Failed to write pid_max\n");
> - return fret;
> + goto out;
> }
>
> - for (nr_procs = 0; nr_procs < 500; nr_procs++) {
> + for (nr_procs = 0; nr_procs < cfg->inner; nr_procs++) {
> pid = fork();
> if (pid < 0)
> break;
> @@ -268,7 +348,7 @@ static int pid_max_nested_limit_inner(void *data)
> pids[nr_procs] = pid;
> }
>
> - if (nr_procs >= 400) {
> + if (nr_procs >= cfg->outer) {
> fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
> goto reap;
> }
> @@ -279,11 +359,14 @@ static int pid_max_nested_limit_inner(void *data)
> for (int i = 0; i < nr_procs; i++)
> wait_for_pid(pids[i]);
>
> +out:
> + free(pids);
> return fret;
> }
>
> static int pid_max_nested_limit_outer(void *data)
> {
> + struct pid_max_cfg *cfg = data;
> int fd, ret;
> pid_t pid;
>
> @@ -307,14 +390,14 @@ static int pid_max_nested_limit_outer(void *data)
> return -1;
> }
>
> - ret = write(fd, "400", sizeof("400") - 1);
> + ret = write_int_to_fd(fd, cfg->outer);
> close(fd);
> if (ret < 0) {
> fprintf(stderr, "%m - Failed to write pid_max\n");
> return -1;
> }
>
> - pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
> + pid = do_clone(pid_max_nested_limit_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
> if (pid < 0) {
> fprintf(stderr, "%m - Failed to clone nested pidns\n");
> return -1;
> @@ -330,28 +413,30 @@ static int pid_max_nested_limit_outer(void *data)
>
> TEST(pid_max_simple)
> {
> + struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
Maybe we can simplify things by using global variable instead of pushing
argument everywhere?
There is also FIXTURE_SETUP/TEST_F which can probably be combined with global variable
too. Plus you can try to avoid calling pid_min() multiple times.
> pid_t pid;
>
> -
> - pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
> + pid = do_clone(pid_max_cb, &cfg, CLONE_NEWPID | CLONE_NEWNS);
> ASSERT_GT(pid, 0);
> ASSERT_EQ(0, wait_for_pid(pid));
> }
>
> TEST(pid_max_nested_limit)
> {
> + struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
> pid_t pid;
>
> - pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
> + pid = do_clone(pid_max_nested_limit_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
> ASSERT_GT(pid, 0);
> ASSERT_EQ(0, wait_for_pid(pid));
> }
>
> TEST(pid_max_nested)
> {
> + struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
> pid_t pid;
>
> - pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
> + pid = do_clone(pid_max_nested_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
> ASSERT_GT(pid, 0);
> ASSERT_EQ(0, wait_for_pid(pid));
> }
--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
next prev parent reply other threads:[~2026-04-22 13:00 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-21 19:43 [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically Bjoern Doebel
2026-04-22 13:00 ` Pavel Tikhomirov [this message]
2026-04-22 19:44 ` Bjoern Doebel
2026-04-22 20:11 ` [PATCH v2] " Bjoern Doebel
2026-04-23 10:07 ` Pavel Tikhomirov
2026-05-11 9:09 ` Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=301ac97e-3f7f-4979-92b0-d5124ff571a7@virtuozzo.com \
--to=ptikhomirov@virtuozzo.com \
--cc=brauner@kernel.org \
--cc=doebel@amazon.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=shuah@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.