public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically
@ 2026-04-21 19:43 Bjoern Doebel
  2026-04-22 13:00 ` Pavel Tikhomirov
  2026-04-22 20:11 ` [PATCH v2] " Bjoern Doebel
  0 siblings, 2 replies; 5+ messages in thread
From: Bjoern Doebel @ 2026-04-21 19:43 UTC (permalink / raw)
  Cc: Bjoern Doebel, Christian Brauner, Pavel Tikhomirov, Shuah Khan,
	open list:NAMESPACES:, open list:KERNEL SELFTEST FRAMEWORK

The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
returns EINVAL and all three tests fail.

Compute these limits the same way as the kernel does and set outer_limit
and inner_limit dynamically based on the result. Original test semantics
are preserved (outer < inner, nested namespace capped by parent).

Signed-off-by: Bjoern Doebel <doebel@amazon.com>
---
 .../testing/selftests/pid_namespace/pid_max.c | 153 ++++++++++++++----
 1 file changed, 119 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index c9519e7385b6..8bd600f55421 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -12,10 +12,76 @@
 #include <syscall.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <unistd.h>
 
 #include "kselftest_harness.h"
 #include "../pidfd/pidfd.h"
 
+/*
+ * The kernel computes the minimum allowed pid_max as:
+ *   max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
+ * Mirror that here so the test values are always valid.
+ *
+ * Note: glibc's get_nprocs_conf() returns the number of *configured*
+ * (present) CPUs, not *possible* CPUs.  The kernel uses
+ * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
+ * These can differ significantly (e.g. 16 configured vs 128 possible).
+ */
+#define RESERVED_PIDS		300
+#define PIDS_PER_CPU_MIN	8
+
+/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
+static int num_possible_cpus(void)
+{
+	FILE *f;
+	int count = 0;
+	int lo, hi;
+
+	f = fopen("/sys/devices/system/cpu/possible", "r");
+	if (!f)
+		return 0;
+
+	while (fscanf(f, "%d", &lo) == 1) {
+		if (fscanf(f, "-%d", &hi) == 1)
+			count += hi - lo + 1;
+		else
+			count++;
+		/* skip comma separator */
+		fscanf(f, ",");
+	}
+
+	fclose(f);
+	return count;
+}
+
+static int pid_min(void)
+{
+	int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
+
+	return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
+}
+
+struct pid_max_cfg {
+	int outer;
+	int inner;
+};
+
+/*
+ * Outer and inner pid_max limits used by the tests.  The outer limit is
+ * the more restrictive ancestor; the inner limit is set higher in a
+ * nested namespace but must still be capped by the outer limit.
+ * Both are derived from the kernel's minimum so they are always writable.
+ */
+#define PID_MAX_CFG_INIT { .outer = pid_min() + 100, .inner = pid_min() + 200 }
+
+static int write_int_to_fd(int fd, int val)
+{
+	char buf[24];
+	int len = snprintf(buf, sizeof(buf), "%d", val);
+
+	return write(fd, buf, len);
+}
+
 #define __STACK_SIZE (8 * 1024 * 1024)
 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
 {
@@ -37,6 +103,7 @@ static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
 
 static int pid_max_cb(void *data)
 {
+	struct pid_max_cfg *cfg = data;
 	int fd, ret;
 	pid_t pid;
 
@@ -60,18 +127,18 @@ static int pid_max_cb(void *data)
 		return -1;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, cfg->inner);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
 		return -1;
 	}
 
-	for (int i = 0; i < 501; i++) {
+	for (int i = 0; i < cfg->inner + 1; i++) {
 		pid = fork();
 		if (pid == 0)
 			exit(EXIT_SUCCESS);
 		wait_for_pid(pid);
-		if (pid > 500) {
+		if (pid > cfg->inner) {
 			fprintf(stderr, "Managed to create pid number beyond limit\n");
 			return -1;
 		}
@@ -82,6 +149,7 @@ static int pid_max_cb(void *data)
 
 static int pid_max_nested_inner(void *data)
 {
+	struct pid_max_cfg *cfg = data;
 	int fret = -1;
 	pid_t pids[2];
 	int fd, i, ret;
@@ -106,7 +174,7 @@ static int pid_max_nested_inner(void *data)
 		return fret;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, cfg->inner);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -133,8 +201,8 @@ static int pid_max_nested_inner(void *data)
 		return fret;
 	}
 
-	/* Now make sure that we wrap pids at 400. */
-	for (i = 0; i < 510; i++) {
+	/* Now make sure that we wrap pids at outer_limit. */
+	for (i = 0; i < cfg->inner + 10; i++) {
 		pid_t pid;
 
 		pid = fork();
@@ -145,7 +213,7 @@ static int pid_max_nested_inner(void *data)
 			exit(EXIT_SUCCESS);
 
 		wait_for_pid(pid);
-		if (pid >= 500) {
+		if (pid >= cfg->inner) {
 			fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
 			return fret;
 		}
@@ -156,15 +224,20 @@ static int pid_max_nested_inner(void *data)
 
 static int pid_max_nested_outer(void *data)
 {
-	int fret = -1, nr_procs = 400;
-	pid_t pids[1000];
-	int fd, i, ret;
+	struct pid_max_cfg *cfg = data;
+	int fret = -1, nr_procs = 0;
+	pid_t *pids;
+	int fd, ret;
 	pid_t pid;
 
+	pids = malloc(cfg->outer * sizeof(pid_t));
+	if (!pids)
+		return -1;
+
 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-		return fret;
+		goto out;
 	}
 
 	umount2("/proc", MNT_DETACH);
@@ -172,27 +245,27 @@ static int pid_max_nested_outer(void *data)
 	ret = mount("proc", "/proc", "proc", 0, NULL);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to mount proc\n");
-		return fret;
+		goto out;
 	}
 
 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
 	if (fd < 0) {
 		fprintf(stderr, "%m - Failed to open pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	ret = write(fd, "400", sizeof("400") - 1);
+	ret = write_int_to_fd(fd, cfg->outer);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
-		return fret;
+		goto out;
 	}
 
 	/*
-	 * Create 397 processes. This leaves room for do_clone() (398) and
-	 * one more 399. So creating another process needs to fail.
+	 * Create (outer - 4) processes. This leaves room for do_clone()
+	 * and one more process. So creating another process needs to fail.
 	 */
-	for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+	for (nr_procs = 0; nr_procs < cfg->outer - 4; nr_procs++) {
 		pid = fork();
 		if (pid < 0)
 			goto reap;
@@ -203,7 +276,7 @@ static int pid_max_nested_outer(void *data)
 		pids[nr_procs] = pid;
 	}
 
-	pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
+	pid = do_clone(pid_max_nested_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
 	if (pid < 0) {
 		fprintf(stderr, "%m - Failed to clone nested pidns\n");
 		goto reap;
@@ -220,20 +293,27 @@ static int pid_max_nested_outer(void *data)
 	for (int i = 0; i < nr_procs; i++)
 		wait_for_pid(pids[i]);
 
+out:
+	free(pids);
 	return fret;
 }
 
 static int pid_max_nested_limit_inner(void *data)
 {
-	int fret = -1, nr_procs = 400;
+	struct pid_max_cfg *cfg = data;
+	int fret = -1, nr_procs = 0;
 	int fd, ret;
 	pid_t pid;
-	pid_t pids[1000];
+	pid_t *pids;
+
+	pids = malloc(cfg->inner * sizeof(pid_t));
+	if (!pids)
+		return -1;
 
 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-		return fret;
+		goto out;
 	}
 
 	umount2("/proc", MNT_DETACH);
@@ -241,23 +321,23 @@ static int pid_max_nested_limit_inner(void *data)
 	ret = mount("proc", "/proc", "proc", 0, NULL);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to mount proc\n");
-		return fret;
+		goto out;
 	}
 
 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
 	if (fd < 0) {
 		fprintf(stderr, "%m - Failed to open pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, cfg->inner);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+	for (nr_procs = 0; nr_procs < cfg->inner; nr_procs++) {
 		pid = fork();
 		if (pid < 0)
 			break;
@@ -268,7 +348,7 @@ static int pid_max_nested_limit_inner(void *data)
 		pids[nr_procs] = pid;
 	}
 
-	if (nr_procs >= 400) {
+	if (nr_procs >= cfg->outer) {
 		fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
 		goto reap;
 	}
@@ -279,11 +359,14 @@ static int pid_max_nested_limit_inner(void *data)
 	for (int i = 0; i < nr_procs; i++)
 		wait_for_pid(pids[i]);
 
+out:
+	free(pids);
 	return fret;
 }
 
 static int pid_max_nested_limit_outer(void *data)
 {
+	struct pid_max_cfg *cfg = data;
 	int fd, ret;
 	pid_t pid;
 
@@ -307,14 +390,14 @@ static int pid_max_nested_limit_outer(void *data)
 		return -1;
 	}
 
-	ret = write(fd, "400", sizeof("400") - 1);
+	ret = write_int_to_fd(fd, cfg->outer);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
 		return -1;
 	}
 
-	pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
+	pid = do_clone(pid_max_nested_limit_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
 	if (pid < 0) {
 		fprintf(stderr, "%m - Failed to clone nested pidns\n");
 		return -1;
@@ -330,28 +413,30 @@ static int pid_max_nested_limit_outer(void *data)
 
 TEST(pid_max_simple)
 {
+	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
 	pid_t pid;
 
-
-	pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
+	pid = do_clone(pid_max_cb, &cfg, CLONE_NEWPID | CLONE_NEWNS);
 	ASSERT_GT(pid, 0);
 	ASSERT_EQ(0, wait_for_pid(pid));
 }
 
 TEST(pid_max_nested_limit)
 {
+	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
 	pid_t pid;
 
-	pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
+	pid = do_clone(pid_max_nested_limit_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
 	ASSERT_GT(pid, 0);
 	ASSERT_EQ(0, wait_for_pid(pid));
 }
 
 TEST(pid_max_nested)
 {
+	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
 	pid_t pid;
 
-	pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
+	pid = do_clone(pid_max_nested_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
 	ASSERT_GT(pid, 0);
 	ASSERT_EQ(0, wait_for_pid(pid));
 }
-- 
2.50.1




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically
  2026-04-21 19:43 [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically Bjoern Doebel
@ 2026-04-22 13:00 ` Pavel Tikhomirov
  2026-04-22 19:44   ` Bjoern Doebel
  2026-04-22 20:11 ` [PATCH v2] " Bjoern Doebel
  1 sibling, 1 reply; 5+ messages in thread
From: Pavel Tikhomirov @ 2026-04-22 13:00 UTC (permalink / raw)
  To: Bjoern Doebel
  Cc: Christian Brauner, Shuah Khan, open list:NAMESPACES:,
	open list:KERNEL SELFTEST FRAMEWORK



On 4/21/26 21:43, Bjoern Doebel wrote:
> This is the first email you've received from this external sender.
> Do not click links or open attachments unless it is an email you expected to receive.
> The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
> kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
> On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
> minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
> returns EINVAL and all three tests fail.
> 
> Compute these limits the same way as the kernel does and set outer_limit
> and inner_limit dynamically based on the result. Original test semantics
> are preserved (outer < inner, nested namespace capped by parent).
> 
> Signed-off-by: Bjoern Doebel <doebel@amazon.com>
> ---
>  .../testing/selftests/pid_namespace/pid_max.c | 153 ++++++++++++++----
>  1 file changed, 119 insertions(+), 34 deletions(-)
> 
> diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
> index c9519e7385b6..8bd600f55421 100644
> --- a/tools/testing/selftests/pid_namespace/pid_max.c
> +++ b/tools/testing/selftests/pid_namespace/pid_max.c
> @@ -12,10 +12,76 @@
>  #include <syscall.h>
>  #include <sys/mount.h>
>  #include <sys/wait.h>
> +#include <unistd.h>
>  
>  #include "kselftest_harness.h"
>  #include "../pidfd/pidfd.h"
>  
> +/*
> + * The kernel computes the minimum allowed pid_max as:
> + *   max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
> + * Mirror that here so the test values are always valid.
> + *
> + * Note: glibc's get_nprocs_conf() returns the number of *configured*
> + * (present) CPUs, not *possible* CPUs.  The kernel uses
> + * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
> + * These can differ significantly (e.g. 16 configured vs 128 possible).
> + */
> +#define RESERVED_PIDS		300
> +#define PIDS_PER_CPU_MIN	8
> +
> +/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
> +static int num_possible_cpus(void)
> +{
> +	FILE *f;
> +	int count = 0;
> +	int lo, hi;
> +
> +	f = fopen("/sys/devices/system/cpu/possible", "r");
> +	if (!f)
> +		return 0;
> +
> +	while (fscanf(f, "%d", &lo) == 1) {
> +		if (fscanf(f, "-%d", &hi) == 1)
> +			count += hi - lo + 1;
> +		else
> +			count++;
> +		/* skip comma separator */
> +		fscanf(f, ",");
> +	}
> +
> +	fclose(f);
> +	return count;
> +}
> +
> +static int pid_min(void)
> +{
> +	int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
> +
> +	return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
> +}
> +
> +struct pid_max_cfg {
> +	int outer;
> +	int inner;
> +};
> +
> +/*
> + * Outer and inner pid_max limits used by the tests.  The outer limit is
> + * the more restrictive ancestor; the inner limit is set higher in a
> + * nested namespace but must still be capped by the outer limit.
> + * Both are derived from the kernel's minimum so they are always writable.
> + */
> +#define PID_MAX_CFG_INIT { .outer = pid_min() + 100, .inner = pid_min() + 200 }
> +
> +static int write_int_to_fd(int fd, int val)
> +{
> +	char buf[24];

Why 24? Since val is int and longest int is INT_MIN, which has 11 characters, so 12 should be enough.

> +	int len = snprintf(buf, sizeof(buf), "%d", val);
> +
> +	return write(fd, buf, len);
> +}
> +
>  #define __STACK_SIZE (8 * 1024 * 1024)
>  static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
>  {
> @@ -37,6 +103,7 @@ static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
>  
>  static int pid_max_cb(void *data)
>  {
> +	struct pid_max_cfg *cfg = data;
>  	int fd, ret;
>  	pid_t pid;
>  
> @@ -60,18 +127,18 @@ static int pid_max_cb(void *data)
>  		return -1;
>  	}
>  
> -	ret = write(fd, "500", sizeof("500") - 1);
> +	ret = write_int_to_fd(fd, cfg->inner);
>  	if (ret < 0) {
>  		fprintf(stderr, "%m - Failed to write pid_max\n");
>  		return -1;
>  	}
>  
> -	for (int i = 0; i < 501; i++) {
> +	for (int i = 0; i < cfg->inner + 1; i++) {
>  		pid = fork();
>  		if (pid == 0)
>  			exit(EXIT_SUCCESS);
>  		wait_for_pid(pid);
> -		if (pid > 500) {
> +		if (pid > cfg->inner) {
>  			fprintf(stderr, "Managed to create pid number beyond limit\n");
>  			return -1;
>  		}
> @@ -82,6 +149,7 @@ static int pid_max_cb(void *data)
>  
>  static int pid_max_nested_inner(void *data)
>  {
> +	struct pid_max_cfg *cfg = data;
>  	int fret = -1;
>  	pid_t pids[2];
>  	int fd, i, ret;
> @@ -106,7 +174,7 @@ static int pid_max_nested_inner(void *data)
>  		return fret;
>  	}
>  
> -	ret = write(fd, "500", sizeof("500") - 1);
> +	ret = write_int_to_fd(fd, cfg->inner);
>  	close(fd);
>  	if (ret < 0) {
>  		fprintf(stderr, "%m - Failed to write pid_max\n");
> @@ -133,8 +201,8 @@ static int pid_max_nested_inner(void *data)
>  		return fret;
>  	}
>  
> -	/* Now make sure that we wrap pids at 400. */
> -	for (i = 0; i < 510; i++) {
> +	/* Now make sure that we wrap pids at outer_limit. */
> +	for (i = 0; i < cfg->inner + 10; i++) {
>  		pid_t pid;
>  
>  		pid = fork();
> @@ -145,7 +213,7 @@ static int pid_max_nested_inner(void *data)
>  			exit(EXIT_SUCCESS);
>  
>  		wait_for_pid(pid);
> -		if (pid >= 500) {
> +		if (pid >= cfg->inner) {
>  			fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
>  			return fret;
>  		}
> @@ -156,15 +224,20 @@ static int pid_max_nested_inner(void *data)
>  
>  static int pid_max_nested_outer(void *data)
>  {
> -	int fret = -1, nr_procs = 400;
> -	pid_t pids[1000];
> -	int fd, i, ret;
> +	struct pid_max_cfg *cfg = data;
> +	int fret = -1, nr_procs = 0;
> +	pid_t *pids;
> +	int fd, ret;
>  	pid_t pid;
>  
> +	pids = malloc(cfg->outer * sizeof(pid_t));
> +	if (!pids)
> +		return -1;
> +
>  	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
>  	if (ret) {
>  		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
> -		return fret;
> +		goto out;
>  	}
>  
>  	umount2("/proc", MNT_DETACH);
> @@ -172,27 +245,27 @@ static int pid_max_nested_outer(void *data)
>  	ret = mount("proc", "/proc", "proc", 0, NULL);
>  	if (ret) {
>  		fprintf(stderr, "%m - Failed to mount proc\n");
> -		return fret;
> +		goto out;
>  	}
>  
>  	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
>  	if (fd < 0) {
>  		fprintf(stderr, "%m - Failed to open pid_max\n");
> -		return fret;
> +		goto out;
>  	}
>  
> -	ret = write(fd, "400", sizeof("400") - 1);
> +	ret = write_int_to_fd(fd, cfg->outer);
>  	close(fd);
>  	if (ret < 0) {
>  		fprintf(stderr, "%m - Failed to write pid_max\n");
> -		return fret;
> +		goto out;
>  	}
>  
>  	/*
> -	 * Create 397 processes. This leaves room for do_clone() (398) and
> -	 * one more 399. So creating another process needs to fail.
> +	 * Create (outer - 4) processes. This leaves room for do_clone()
> +	 * and one more process. So creating another process needs to fail.
>  	 */
> -	for (nr_procs = 0; nr_procs < 396; nr_procs++) {
> +	for (nr_procs = 0; nr_procs < cfg->outer - 4; nr_procs++) {
>  		pid = fork();
>  		if (pid < 0)
>  			goto reap;
> @@ -203,7 +276,7 @@ static int pid_max_nested_outer(void *data)
>  		pids[nr_procs] = pid;
>  	}
>  
> -	pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
> +	pid = do_clone(pid_max_nested_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
>  	if (pid < 0) {
>  		fprintf(stderr, "%m - Failed to clone nested pidns\n");
>  		goto reap;
> @@ -220,20 +293,27 @@ static int pid_max_nested_outer(void *data)
>  	for (int i = 0; i < nr_procs; i++)
>  		wait_for_pid(pids[i]);
>  
> +out:
> +	free(pids);
>  	return fret;
>  }
>  
>  static int pid_max_nested_limit_inner(void *data)
>  {
> -	int fret = -1, nr_procs = 400;
> +	struct pid_max_cfg *cfg = data;
> +	int fret = -1, nr_procs = 0;
>  	int fd, ret;
>  	pid_t pid;
> -	pid_t pids[1000];
> +	pid_t *pids;
> +
> +	pids = malloc(cfg->inner * sizeof(pid_t));
> +	if (!pids)
> +		return -1;
>  
>  	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
>  	if (ret) {
>  		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
> -		return fret;
> +		goto out;
>  	}
>  
>  	umount2("/proc", MNT_DETACH);
> @@ -241,23 +321,23 @@ static int pid_max_nested_limit_inner(void *data)
>  	ret = mount("proc", "/proc", "proc", 0, NULL);
>  	if (ret) {
>  		fprintf(stderr, "%m - Failed to mount proc\n");
> -		return fret;
> +		goto out;
>  	}
>  
>  	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
>  	if (fd < 0) {
>  		fprintf(stderr, "%m - Failed to open pid_max\n");
> -		return fret;
> +		goto out;
>  	}
>  
> -	ret = write(fd, "500", sizeof("500") - 1);
> +	ret = write_int_to_fd(fd, cfg->inner);
>  	close(fd);
>  	if (ret < 0) {
>  		fprintf(stderr, "%m - Failed to write pid_max\n");
> -		return fret;
> +		goto out;
>  	}
>  
> -	for (nr_procs = 0; nr_procs < 500; nr_procs++) {
> +	for (nr_procs = 0; nr_procs < cfg->inner; nr_procs++) {
>  		pid = fork();
>  		if (pid < 0)
>  			break;
> @@ -268,7 +348,7 @@ static int pid_max_nested_limit_inner(void *data)
>  		pids[nr_procs] = pid;
>  	}
>  
> -	if (nr_procs >= 400) {
> +	if (nr_procs >= cfg->outer) {
>  		fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
>  		goto reap;
>  	}
> @@ -279,11 +359,14 @@ static int pid_max_nested_limit_inner(void *data)
>  	for (int i = 0; i < nr_procs; i++)
>  		wait_for_pid(pids[i]);
>  
> +out:
> +	free(pids);
>  	return fret;
>  }
>  
>  static int pid_max_nested_limit_outer(void *data)
>  {
> +	struct pid_max_cfg *cfg = data;
>  	int fd, ret;
>  	pid_t pid;
>  
> @@ -307,14 +390,14 @@ static int pid_max_nested_limit_outer(void *data)
>  		return -1;
>  	}
>  
> -	ret = write(fd, "400", sizeof("400") - 1);
> +	ret = write_int_to_fd(fd, cfg->outer);
>  	close(fd);
>  	if (ret < 0) {
>  		fprintf(stderr, "%m - Failed to write pid_max\n");
>  		return -1;
>  	}
>  
> -	pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
> +	pid = do_clone(pid_max_nested_limit_inner, cfg, CLONE_NEWPID | CLONE_NEWNS);
>  	if (pid < 0) {
>  		fprintf(stderr, "%m - Failed to clone nested pidns\n");
>  		return -1;
> @@ -330,28 +413,30 @@ static int pid_max_nested_limit_outer(void *data)
>  
>  TEST(pid_max_simple)
>  {
> +	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;

Maybe we can simplify things by using global variable instead of pushing 
argument everywhere?

There is also FIXTURE_SETUP/TEST_F which can probably be combined with global variable 
too. Plus you can try to avoid calling pid_min() multiple times.

>  	pid_t pid;
>  
> -
> -	pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
> +	pid = do_clone(pid_max_cb, &cfg, CLONE_NEWPID | CLONE_NEWNS);
>  	ASSERT_GT(pid, 0);
>  	ASSERT_EQ(0, wait_for_pid(pid));
>  }
>  
>  TEST(pid_max_nested_limit)
>  {
> +	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
>  	pid_t pid;
>  
> -	pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
> +	pid = do_clone(pid_max_nested_limit_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
>  	ASSERT_GT(pid, 0);
>  	ASSERT_EQ(0, wait_for_pid(pid));
>  }
>  
>  TEST(pid_max_nested)
>  {
> +	struct pid_max_cfg cfg = PID_MAX_CFG_INIT;
>  	pid_t pid;
>  
> -	pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
> +	pid = do_clone(pid_max_nested_outer, &cfg, CLONE_NEWPID | CLONE_NEWNS);
>  	ASSERT_GT(pid, 0);
>  	ASSERT_EQ(0, wait_for_pid(pid));
>  }

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically
  2026-04-22 13:00 ` Pavel Tikhomirov
@ 2026-04-22 19:44   ` Bjoern Doebel
  0 siblings, 0 replies; 5+ messages in thread
From: Bjoern Doebel @ 2026-04-22 19:44 UTC (permalink / raw)
  To: Pavel Tikhomirov
  Cc: Bjoern Doebel, Bjoern Doebel, Christian Brauner, Shuah Khan,
	open list:NAMESPACES:, open list:KERNEL SELFTEST FRAMEWORK

From: Bjoern Doebel <doebel@dev-dsk-doebel-1a-7b355d76.us-east-1.amazon.com>

On Wed, Apr 22, 2026 at 03:00:12PM +0200, Pavel Tikhomirov wrote:
> Why 24? Since val is int and longest int is INT_MIN, which has 11 characters, so 12 should be enough.

That's true, changing it to buf[12].

> Maybe we can simplify things by using global variable instead of pushing 
> argument everywhere?
> 
> There is also FIXTURE_SETUP/TEST_F which can probably be combined with global variable 
> too. Plus you can try to avoid calling pid_min() multiple times.

Done, thanks for pointing me at the right infrastructure.

Sending v2.

Bjoern




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2] selftests/pid_namespace: compute pid_max test limits dynamically
  2026-04-21 19:43 [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically Bjoern Doebel
  2026-04-22 13:00 ` Pavel Tikhomirov
@ 2026-04-22 20:11 ` Bjoern Doebel
  2026-04-23 10:07   ` Pavel Tikhomirov
  1 sibling, 1 reply; 5+ messages in thread
From: Bjoern Doebel @ 2026-04-22 20:11 UTC (permalink / raw)
  To: linux-kselftest
  Cc: brauner, ptikhomirov, shuah, linux-kernel, stable, Bjoern Doebel

The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
returns EINVAL and all three tests fail.

Compute these limits the same way as the kernel does and set outer_limit
and inner_limit dynamically based on the result. Original test semantics
are preserved (outer < inner, nested namespace capped by parent).

Signed-off-by: Bjoern Doebel <doebel@amazon.com>
Assisted-by: Kiro:claude-opus-4.6
---
v2:
- use global outer_limit/inner_limit instead of complicated config
  struct
- make use of FIXTURE/TEST_F macros
- reduce buffer size in write_int_to_fd() to 12

 .../testing/selftests/pid_namespace/pid_max.c | 156 ++++++++++++++----
 1 file changed, 124 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index c9519e7385b6..5d686a09aa15 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -12,10 +12,74 @@
 #include <syscall.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <unistd.h>
 
 #include "kselftest_harness.h"
 #include "../pidfd/pidfd.h"
 
+/*
+ * The kernel computes the minimum allowed pid_max as:
+ *   max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
+ * Mirror that here so the test values are always valid.
+ *
+ * Note: glibc's get_nprocs_conf() returns the number of *configured*
+ * (present) CPUs, not *possible* CPUs.  The kernel uses
+ * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
+ * These can differ significantly (e.g. 16 configured vs 128 possible).
+ */
+#define RESERVED_PIDS		300
+#define PIDS_PER_CPU_MIN	8
+
+/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
+static int num_possible_cpus(void)
+{
+	FILE *f;
+	int count = 0;
+	int lo, hi;
+
+	f = fopen("/sys/devices/system/cpu/possible", "r");
+	if (!f)
+		return 0;
+
+	while (fscanf(f, "%d", &lo) == 1) {
+		if (fscanf(f, "-%d", &hi) == 1)
+			count += hi - lo + 1;
+		else
+			count++;
+		/* skip comma separator */
+		fscanf(f, ",");
+	}
+
+	fclose(f);
+	return count;
+}
+
+static int pid_min(void)
+{
+	int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
+
+	return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
+}
+
+/*
+ * Outer and inner pid_max limits used by the tests.  The outer limit is
+ * the more restrictive ancestor; the inner limit is set higher in a
+ * nested namespace but must still be capped by the outer limit.
+ * Both are derived from the kernel's minimum so they are always writable.
+ *
+ * Global so that clone callbacks can access them without parameter plumbing.
+ */
+static int outer_limit;
+static int inner_limit;
+
+static int write_int_to_fd(int fd, int val)
+{
+	char buf[12];
+	int len = snprintf(buf, sizeof(buf), "%d", val);
+
+	return write(fd, buf, len);
+}
+
 #define __STACK_SIZE (8 * 1024 * 1024)
 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
 {
@@ -60,18 +124,18 @@ static int pid_max_cb(void *data)
 		return -1;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, inner_limit);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
 		return -1;
 	}
 
-	for (int i = 0; i < 501; i++) {
+	for (int i = 0; i < inner_limit + 1; i++) {
 		pid = fork();
 		if (pid == 0)
 			exit(EXIT_SUCCESS);
 		wait_for_pid(pid);
-		if (pid > 500) {
+		if (pid > inner_limit) {
 			fprintf(stderr, "Managed to create pid number beyond limit\n");
 			return -1;
 		}
@@ -106,7 +170,7 @@ static int pid_max_nested_inner(void *data)
 		return fret;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, inner_limit);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -133,8 +197,8 @@ static int pid_max_nested_inner(void *data)
 		return fret;
 	}
 
-	/* Now make sure that we wrap pids at 400. */
-	for (i = 0; i < 510; i++) {
+	/* Now make sure that we wrap pids at outer_limit. */
+	for (i = 0; i < inner_limit + 10; i++) {
 		pid_t pid;
 
 		pid = fork();
@@ -145,7 +209,7 @@ static int pid_max_nested_inner(void *data)
 			exit(EXIT_SUCCESS);
 
 		wait_for_pid(pid);
-		if (pid >= 500) {
+		if (pid >= inner_limit) {
 			fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
 			return fret;
 		}
@@ -156,15 +220,19 @@ static int pid_max_nested_inner(void *data)
 
 static int pid_max_nested_outer(void *data)
 {
-	int fret = -1, nr_procs = 400;
-	pid_t pids[1000];
-	int fd, i, ret;
+	int fret = -1, nr_procs = 0;
+	pid_t *pids;
+	int fd, ret;
 	pid_t pid;
 
+	pids = malloc(outer_limit * sizeof(pid_t));
+	if (!pids)
+		return -1;
+
 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-		return fret;
+		goto out;
 	}
 
 	umount2("/proc", MNT_DETACH);
@@ -172,27 +240,28 @@ static int pid_max_nested_outer(void *data)
 	ret = mount("proc", "/proc", "proc", 0, NULL);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to mount proc\n");
-		return fret;
+		goto out;
 	}
 
 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
 	if (fd < 0) {
 		fprintf(stderr, "%m - Failed to open pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	ret = write(fd, "400", sizeof("400") - 1);
+	ret = write_int_to_fd(fd, outer_limit);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
-		return fret;
+		goto out;
 	}
 
 	/*
-	 * Create 397 processes. This leaves room for do_clone() (398) and
-	 * one more 399. So creating another process needs to fail.
+	 * Create (outer_limit - 4) processes. This leaves room for
+	 * do_clone() and one more. So creating another process needs
+	 * to fail.
 	 */
-	for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+	for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) {
 		pid = fork();
 		if (pid < 0)
 			goto reap;
@@ -220,20 +289,26 @@ static int pid_max_nested_outer(void *data)
 	for (int i = 0; i < nr_procs; i++)
 		wait_for_pid(pids[i]);
 
+out:
+	free(pids);
 	return fret;
 }
 
 static int pid_max_nested_limit_inner(void *data)
 {
-	int fret = -1, nr_procs = 400;
+	int fret = -1, nr_procs = 0;
 	int fd, ret;
 	pid_t pid;
-	pid_t pids[1000];
+	pid_t *pids;
+
+	pids = malloc(inner_limit * sizeof(pid_t));
+	if (!pids)
+		return -1;
 
 	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to make rootfs private mount\n");
-		return fret;
+		goto out;
 	}
 
 	umount2("/proc", MNT_DETACH);
@@ -241,23 +316,23 @@ static int pid_max_nested_limit_inner(void *data)
 	ret = mount("proc", "/proc", "proc", 0, NULL);
 	if (ret) {
 		fprintf(stderr, "%m - Failed to mount proc\n");
-		return fret;
+		goto out;
 	}
 
 	fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
 	if (fd < 0) {
 		fprintf(stderr, "%m - Failed to open pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	ret = write(fd, "500", sizeof("500") - 1);
+	ret = write_int_to_fd(fd, inner_limit);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
-		return fret;
+		goto out;
 	}
 
-	for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+	for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) {
 		pid = fork();
 		if (pid < 0)
 			break;
@@ -268,7 +343,7 @@ static int pid_max_nested_limit_inner(void *data)
 		pids[nr_procs] = pid;
 	}
 
-	if (nr_procs >= 400) {
+	if (nr_procs >= outer_limit) {
 		fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
 		goto reap;
 	}
@@ -279,6 +354,8 @@ static int pid_max_nested_limit_inner(void *data)
 	for (int i = 0; i < nr_procs; i++)
 		wait_for_pid(pids[i]);
 
+out:
+	free(pids);
 	return fret;
 }
 
@@ -307,7 +384,7 @@ static int pid_max_nested_limit_outer(void *data)
 		return -1;
 	}
 
-	ret = write(fd, "400", sizeof("400") - 1);
+	ret = write_int_to_fd(fd, outer_limit);
 	close(fd);
 	if (ret < 0) {
 		fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data)
 	return 0;
 }
 
-TEST(pid_max_simple)
+FIXTURE(pid_max) {
+	int dummy;
+};
+
+FIXTURE_SETUP(pid_max)
 {
-	pid_t pid;
+	int min = pid_min();
 
+	outer_limit = min + 100;
+	inner_limit = min + 200;
+}
+
+FIXTURE_TEARDOWN(pid_max)
+{
+}
+
+TEST_F(pid_max, simple)
+{
+	pid_t pid;
 
 	pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
 	ASSERT_GT(pid, 0);
 	ASSERT_EQ(0, wait_for_pid(pid));
 }
 
-TEST(pid_max_nested_limit)
+TEST_F(pid_max, nested_limit)
 {
 	pid_t pid;
 
@@ -347,7 +439,7 @@ TEST(pid_max_nested_limit)
 	ASSERT_EQ(0, wait_for_pid(pid));
 }
 
-TEST(pid_max_nested)
+TEST_F(pid_max, nested)
 {
 	pid_t pid;
 
-- 
2.50.1




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] selftests/pid_namespace: compute pid_max test limits dynamically
  2026-04-22 20:11 ` [PATCH v2] " Bjoern Doebel
@ 2026-04-23 10:07   ` Pavel Tikhomirov
  0 siblings, 0 replies; 5+ messages in thread
From: Pavel Tikhomirov @ 2026-04-23 10:07 UTC (permalink / raw)
  To: Bjoern Doebel, linux-kselftest; +Cc: brauner, shuah, linux-kernel, stable



On 4/22/26 22:11, Bjoern Doebel wrote:
> The pid_max kselftest hardcodes pid_max values of 400 and 500, but the
> kernel enforces a minimum of PIDS_PER_CPU_MIN * num_possible_cpus().
> On machines with many possible CPUs (e.g. nr_cpu_ids=128 yields a
> minimum of 1024), writing 400 or 500 to /proc/sys/kernel/pid_max
> returns EINVAL and all three tests fail.
> 
> Compute these limits the same way as the kernel does and set outer_limit
> and inner_limit dynamically based on the result. Original test semantics
> are preserved (outer < inner, nested namespace capped by parent).
> 

Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>

> Signed-off-by: Bjoern Doebel <doebel@amazon.com>
> Assisted-by: Kiro:claude-opus-4.6
> ---
> v2:
> - use global outer_limit/inner_limit instead of complicated config
>   struct
> - make use of FIXTURE/TEST_F macros
> - reduce buffer size in write_int_to_fd() to 12
> 
>  .../testing/selftests/pid_namespace/pid_max.c | 156 ++++++++++++++----
>  1 file changed, 124 insertions(+), 32 deletions(-)
> 
...
> @@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data)
>  	return 0;
>  }
>  
> -TEST(pid_max_simple)
> +FIXTURE(pid_max) {
> +	int dummy;

nit: Having dummy variable here does not seem to be required.

> +};
> +
> +FIXTURE_SETUP(pid_max)
>  {
> -	pid_t pid;
> +	int min = pid_min();
>  
> +	outer_limit = min + 100;
> +	inner_limit = min + 200;
> +}
> +
> +FIXTURE_TEARDOWN(pid_max)
> +{
> +}
> +
> +TEST_F(pid_max, simple)


-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-23 10:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21 19:43 [PATCH] selftests/pid_namespace: compute pid_max test limits dynamically Bjoern Doebel
2026-04-22 13:00 ` Pavel Tikhomirov
2026-04-22 19:44   ` Bjoern Doebel
2026-04-22 20:11 ` [PATCH v2] " Bjoern Doebel
2026-04-23 10:07   ` Pavel Tikhomirov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox