All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Tejun Heo <tj@kernel.org>, Oleg Nesterov <oleg@redhat.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] pidns: Make pid_max per namespace
Date: Thu, 03 Mar 2011 11:39:17 +0300	[thread overview]
Message-ID: <4D6F53B5.5090105@parallels.com> (raw)

Rationale:

On x86_64 with big ram people running containers set pid_max on host to 
large values to be able to launch more containers. At the same time 
containers running 32-bit software experience problems with large pids - ps
calls readdir/stat on proc entries and inode's i_ino happen to be too big 
for the 32-bit API.

Thus, the ability to limit the pid value inside container is required.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

---

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 38d1032..248220d 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -20,6 +20,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	int pid_max;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
diff --git a/kernel/pid.c b/kernel/pid.c
index 39b65b6..aafc285 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -43,12 +43,10 @@ static struct hlist_head *pid_hash;
 static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
-int pid_max = PID_MAX_DEFAULT;
-
 #define RESERVED_PIDS		300
 
-int pid_max_min = RESERVED_PIDS + 1;
-int pid_max_max = PID_MAX_LIMIT;
+static int pid_max_min = RESERVED_PIDS + 1;
+static int pid_max_max = PID_MAX_LIMIT;
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -161,7 +159,7 @@ static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
 
 static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
-	int i, offset, max_scan, pid, last = pid_ns->last_pid;
+	int i, offset, max_scan, pid, last = pid_ns->last_pid, pid_max = pid_ns->pid_max;
 	struct pidmap *map;
 
 	pid = last + 1;
@@ -546,14 +544,40 @@ void __init pidhash_init(void)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
 
+static int proc_dointvec_pidmax(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table tmp;
+
+	tmp = *table;
+	tmp.data = &current->nsproxy->pid_ns->pid_max;
+
+	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table pid_ctl_table[] = {
+	{
+		.procname	= "pid_max",
+		.data		= &init_pid_ns.pid_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_pidmax,
+		.extra1		= &pid_max_min,
+		.extra2		= &pid_max_max,
+	},
+	{ }
+};
+
+static struct ctl_path pid_kern_path[] = { { .procname = "kernel" }, { } };
+
 void __init pidmap_init(void)
 {
 	/* bump default and minimum pid_max based on number of cpus */
-	pid_max = min(pid_max_max, max_t(int, pid_max,
+	init_pid_ns.pid_max = min(pid_max_max, max_t(int, PID_MAX_DEFAULT,
 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
 	pid_max_min = max_t(int, pid_max_min,
 				PIDS_PER_CPU_MIN * num_possible_cpus());
-	pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+	pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min);
 
 	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	/* Reserve PID 0. We never call free_pidmap(0) */
@@ -562,4 +586,5 @@ void __init pidmap_init(void)
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	register_sysctl_paths(pid_kern_path, pid_ctl_table);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94..93d594e 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -89,6 +89,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
+	ns->pid_max = parent_pid_ns->pid_max;
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83..0f94054 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -93,9 +93,7 @@ extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
 extern unsigned int core_pipe_limit;
-extern int pid_max;
 extern int min_free_kbytes;
-extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
@@ -653,15 +651,6 @@ static struct ctl_table kern_table[] = {
 	},
 #endif
 	{
-		.procname	= "pid_max",
-		.data		= &pid_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &pid_max_min,
-		.extra2		= &pid_max_max,
-	},
-	{
 		.procname	= "panic_on_oops",
 		.data		= &panic_on_oops,
 		.maxlen		= sizeof(int),

             reply	other threads:[~2011-03-03  8:43 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-03  8:39 Pavel Emelyanov [this message]
2011-03-07 23:58 ` [PATCH] pidns: Make pid_max per namespace Andrew Morton
2011-03-10  9:35   ` Pavel Emelyanov
2011-03-10  9:50     ` Andrew Morton
2011-03-10 10:06       ` Pavel Emelyanov
2011-03-10 10:44         ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D6F53B5.5090105@parallels.com \
    --to=xemul@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.