public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Tejun Heo <tj@kernel.org>, Oleg Nesterov <oleg@redhat.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] pidns: Make pid_max per namespace
Date: Thu, 03 Mar 2011 11:39:17 +0300	[thread overview]
Message-ID: <4D6F53B5.5090105@parallels.com> (raw)

Rationale:

On x86_64 with big ram people running containers set pid_max on host to 
large values to be able to launch more containers. At the same time 
containers running 32-bit software experience problems with large pids - ps
calls readdir/stat on proc entries and inode's i_ino happen to be too big 
for the 32-bit API.

Thus, the ability to limit the pid value inside container is required.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

---

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 38d1032..248220d 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -20,6 +20,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	int pid_max;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
diff --git a/kernel/pid.c b/kernel/pid.c
index 39b65b6..aafc285 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -43,12 +43,10 @@ static struct hlist_head *pid_hash;
 static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
-int pid_max = PID_MAX_DEFAULT;
-
 #define RESERVED_PIDS		300
 
-int pid_max_min = RESERVED_PIDS + 1;
-int pid_max_max = PID_MAX_LIMIT;
+static int pid_max_min = RESERVED_PIDS + 1;
+static int pid_max_max = PID_MAX_LIMIT;
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -161,7 +159,7 @@ static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
 
 static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
-	int i, offset, max_scan, pid, last = pid_ns->last_pid;
+	int i, offset, max_scan, pid, last = pid_ns->last_pid, pid_max = pid_ns->pid_max;
 	struct pidmap *map;
 
 	pid = last + 1;
@@ -546,14 +544,40 @@ void __init pidhash_init(void)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
 
+static int proc_dointvec_pidmax(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table tmp;
+
+	tmp = *table;
+	tmp.data = &current->nsproxy->pid_ns->pid_max;
+
+	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table pid_ctl_table[] = {
+	{
+		.procname	= "pid_max",
+		.data		= &init_pid_ns.pid_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_pidmax,
+		.extra1		= &pid_max_min,
+		.extra2		= &pid_max_max,
+	},
+	{ }
+};
+
+static struct ctl_path pid_kern_path[] = { { .procname = "kernel" }, { } };
+
 void __init pidmap_init(void)
 {
 	/* bump default and minimum pid_max based on number of cpus */
-	pid_max = min(pid_max_max, max_t(int, pid_max,
+	init_pid_ns.pid_max = min(pid_max_max, max_t(int, PID_MAX_DEFAULT,
 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
 	pid_max_min = max_t(int, pid_max_min,
 				PIDS_PER_CPU_MIN * num_possible_cpus());
-	pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+	pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min);
 
 	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	/* Reserve PID 0. We never call free_pidmap(0) */
@@ -562,4 +586,5 @@ void __init pidmap_init(void)
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	register_sysctl_paths(pid_kern_path, pid_ctl_table);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94..93d594e 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -89,6 +89,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
+	ns->pid_max = parent_pid_ns->pid_max;
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83..0f94054 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -93,9 +93,7 @@ extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
 extern unsigned int core_pipe_limit;
-extern int pid_max;
 extern int min_free_kbytes;
-extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
@@ -653,15 +651,6 @@ static struct ctl_table kern_table[] = {
 	},
 #endif
 	{
-		.procname	= "pid_max",
-		.data		= &pid_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &pid_max_min,
-		.extra2		= &pid_max_max,
-	},
-	{
 		.procname	= "panic_on_oops",
 		.data		= &panic_on_oops,
 		.maxlen		= sizeof(int),

             reply	other threads:[~2011-03-03  8:43 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-03  8:39 Pavel Emelyanov [this message]
2011-03-07 23:58 ` [PATCH] pidns: Make pid_max per namespace Andrew Morton
2011-03-10  9:35   ` Pavel Emelyanov
2011-03-10  9:50     ` Andrew Morton
2011-03-10 10:06       ` Pavel Emelyanov
2011-03-10 10:44         ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D6F53B5.5090105@parallels.com \
    --to=xemul@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox