All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
@ 2009-05-27 15:42 Sukadev Bhattiprolu
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:42 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:39 -0700
Subject: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
 1 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index b2e5f78..c0aaebe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
 	atomic_inc(&map->nr_free);
 }
 
+static int alloc_pidmap_page(struct pidmap *map)
+{
+	void *page;
+
+	if (likely(map->page))
+		return 0;
+
+	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+	/*
+	 * Free the page if someone raced with us installing it:
+	 */
+	spin_lock_irq(&pidmap_lock);
+	if (map->page)
+		kfree(page);
+	else
+		map->page = page;
+	spin_unlock_irq(&pidmap_lock);
+
+	if (unlikely(!map->page))
+		return -1;
+
+	return 0;
+}
+
 static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
@@ -134,21 +159,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (unlikely(!map->page)) {
-			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irq(&pidmap_lock);
-			if (map->page)
-				kfree(page);
-			else
-				map->page = page;
-			spin_unlock_irq(&pidmap_lock);
-			if (unlikely(!map->page))
-				break;
-		}
+		if (alloc_pidmap_page(map))
+			break;
+
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 15:42   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154259.GB3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:43   ` [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap() Sukadev Bhattiprolu
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:42 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:40 -0700
Subject: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code

alloc_pidmap() can fail either because all pid numbers are in use or
we can't allocate memory. With support for setting a specific pid
number, alloc_pidmap() would also fail if either the given pid
number is invalid or in use.

Rather than have caller assume -ENOMEM, have alloc_pidmap() return
the actual error.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/fork.c |    5 +++--
 kernel/pid.c  |    9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..f8411a8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_io;
 
 	if (pid != &init_struct_pid) {
-		retval = -ENOMEM;
 		pid = alloc_pid(p->nsproxy->pid_ns);
-		if (!pid)
+		if (IS_ERR(pid)) {
+			retval = PTR_ERR(pid);
 			goto bad_fork_cleanup_io;
+		}
 
 		if (clone_flags & CLONE_NEWPID) {
 			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
diff --git a/kernel/pid.c b/kernel/pid.c
index c0aaebe..fd72ad9 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
+	int rc = -EAGAIN;
 
 	pid = last + 1;
 	if (pid >= pid_max)
@@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (alloc_pidmap_page(map))
+		if (alloc_pidmap_page(map)) {
+			rc = -ENOMEM;
 			break;
+		}
 
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
@@ -192,7 +195,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 		}
 		pid = mk_pid(pid_ns, map, offset);
 	}
-	return -1;
+	return rc;
 }
 
 int next_pidmap(struct pid_namespace *pid_ns, int last)
@@ -297,7 +300,7 @@ out_free:
 		free_pidmap(pid->numbers + i);
 
 	kmem_cache_free(ns->pid_cachep, pid);
-	pid = NULL;
+	pid = ERR_PTR(nr);
 	goto out;
 }
 
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap()
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:42   ` [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code Sukadev Bhattiprolu
@ 2009-05-27 15:43   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154333.GC3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:43   ` [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid() Sukadev Bhattiprolu
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:43 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:41 -0700
Subject: [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap()

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/pid.c |   28 ++++++++++++++++++++++++++--
 1 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index fd72ad9..93406c6 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -147,12 +147,36 @@ static int alloc_pidmap_page(struct pidmap *map)
 	return 0;
 }
 
-static int alloc_pidmap(struct pid_namespace *pid_ns)
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
+{
+	int offset;
+	struct pidmap *map;
+
+	if (pid >= pid_max)
+		return -EINVAL;
+
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+
+	if (alloc_pidmap_page(map))
+		return -ENOMEM;
+
+	if (test_and_set_bit(offset, map->page))
+		return -EBUSY;
+
+	atomic_dec(&map->nr_free);
+	return pid;
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns, int target_pid)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
 	int rc = -EAGAIN;
 
+	if (target_pid)
+		return set_pidmap(pid_ns, target_pid);
+
 	pid = last + 1;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
@@ -269,7 +293,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
 	tmp = ns;
 	for (i = ns->level; i >= 0; i--) {
-		nr = alloc_pidmap(tmp);
+		nr = alloc_pidmap(tmp, 0);
 		if (nr < 0)
 			goto out_free;
 
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid()
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:42   ` [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code Sukadev Bhattiprolu
  2009-05-27 15:43   ` [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap() Sukadev Bhattiprolu
@ 2009-05-27 15:43   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154355.GD3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:44   ` [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process() Sukadev Bhattiprolu
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:43 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:42 -0700
Subject: [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid()

This parameter is currently NULL, but will be used in a follow-on patch.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 include/linux/pid.h |    2 +-
 kernel/fork.c       |    3 ++-
 kernel/pid.c        |    9 +++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 49f1c2f..914185d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 int next_pidmap(struct pid_namespace *pid_ns, int last);
 
-extern struct pid *alloc_pid(struct pid_namespace *ns);
+extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids);
 extern void free_pid(struct pid *pid);
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index f8411a8..d2d69d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -954,6 +954,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	int retval;
 	struct task_struct *p;
 	int cgroup_callbacks_done = 0;
+	pid_t *target_pids = NULL;
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1119,7 +1120,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_io;
 
 	if (pid != &init_struct_pid) {
-		pid = alloc_pid(p->nsproxy->pid_ns);
+		pid = alloc_pid(p->nsproxy->pid_ns, target_pids);
 		if (IS_ERR(pid)) {
 			retval = PTR_ERR(pid);
 			goto bad_fork_cleanup_io;
diff --git a/kernel/pid.c b/kernel/pid.c
index 93406c6..4b2373a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -279,13 +279,14 @@ void free_pid(struct pid *pid)
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids)
 {
 	struct pid *pid;
 	enum pid_type type;
 	int i, nr;
 	struct pid_namespace *tmp;
 	struct upid *upid;
+	int tpid;
 
 	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
 	if (!pid)
@@ -293,7 +294,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
 	tmp = ns;
 	for (i = ns->level; i >= 0; i--) {
-		nr = alloc_pidmap(tmp, 0);
+		tpid = 0;
+		if (target_pids)
+			tpid = target_pids[i];
+
+		nr = alloc_pidmap(tmp, tpid);
 		if (nr < 0)
 			goto out_free;
 
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (2 preceding siblings ...)
  2009-05-27 15:43   ` [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid() Sukadev Bhattiprolu
@ 2009-05-27 15:44   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154422.GE3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:44   ` [PATCH 6/7] [PATCH] Define do_fork_with_pids() Sukadev Bhattiprolu
                     ` (4 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:44 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:43 -0700
Subject: [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()

The new parameter will be used in a follow-on patch when clone_with_pids()
is implemented.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/fork.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index d2d69d3..373411e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -949,12 +949,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_size,
 					int __user *child_tidptr,
 					struct pid *pid,
+					pid_t *target_pids,
 					int trace)
 {
 	int retval;
 	struct task_struct *p;
 	int cgroup_callbacks_done = 0;
-	pid_t *target_pids = NULL;
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1327,7 +1327,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	struct pt_regs regs;
 
 	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
-			    &init_struct_pid, 0);
+			    &init_struct_pid, NULL, 0);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
 
@@ -1350,6 +1350,7 @@ long do_fork(unsigned long clone_flags,
 	struct task_struct *p;
 	int trace = 0;
 	long nr;
+	pid_t *target_pids = NULL;
 
 	/*
 	 * Do some preliminary argument and permissions checking before we
@@ -1390,7 +1391,7 @@ long do_fork(unsigned long clone_flags,
 		trace = tracehook_prepare_clone(clone_flags);
 
 	p = copy_process(clone_flags, stack_start, regs, stack_size,
-			 child_tidptr, NULL, trace);
+			 child_tidptr, NULL, target_pids, trace);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/7] [PATCH] Define do_fork_with_pids()
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (3 preceding siblings ...)
  2009-05-27 15:44   ` [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process() Sukadev Bhattiprolu
@ 2009-05-27 15:44   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154443.GF3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 15:45   ` [PATCH 7/7] [PATCH] Define clone_with_pids syscall Sukadev Bhattiprolu
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:44 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:44 -0700
Subject: [PATCH 6/7] [PATCH] Define do_fork_with_pids()

do_fork_with_pids() is same as do_fork(), except that it takes an
additional, target_pids, parameter. This parameter, currently unused,
specifies the target_pids of the process in each of its pid namespaces.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 include/linux/sched.h |    1 +
 kernel/fork.c         |   17 ++++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..2173df1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,6 +1995,7 @@ extern int disallow_signal(int);
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork_with_pids(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, pid_t *target_pids);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
diff --git a/kernel/fork.c b/kernel/fork.c
index 373411e..912d008 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1340,17 +1340,17 @@ struct task_struct * __cpuinit fork_idle(int cpu)
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_with_pids(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      pid_t *target_pids)
 {
 	struct task_struct *p;
 	int trace = 0;
 	long nr;
-	pid_t *target_pids = NULL;
 
 	/*
 	 * Do some preliminary argument and permissions checking before we
@@ -1448,6 +1448,17 @@ long do_fork(unsigned long clone_flags,
 	return nr;
 }
 
+long do_fork(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, NULL);
+}
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 7/7] [PATCH] Define clone_with_pids syscall
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (4 preceding siblings ...)
  2009-05-27 15:44   ` [PATCH 6/7] [PATCH] Define do_fork_with_pids() Sukadev Bhattiprolu
@ 2009-05-27 15:45   ` Sukadev Bhattiprolu
       [not found]     ` <20090527154507.GG3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 17:26   ` [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page Dave Hansen
                     ` (2 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-27 15:45 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen


From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
Date: Mon, 4 May 2009 01:17:45 -0700
Subject: [PATCH 7/7] [PATCH] Define clone_with_pids syscall

clone_with_pids() is same as clone(), except that it takes a 'target_pid_set'
paramter which lets caller choose a specific pid number for the child process
in each of the child process's pid namespace. This system call would be needed
to implement Checkpoint/Restart (i.e after a checkpoint, restart a process with
its original pids).

Call clone_with_pids as follows:

	pid_t pids[] = { 0, 77, 99 };
	struct target_pid_set pid_set;

	pid_set.num_pids = sizeof(pids) / sizeof(int);
	pid_set.target_pids = &pids;

	syscall(__NR_clone_with_pids, flags, stack, NULL, NULL, NULL, &pid_set);

If a target-pid is 0, the kernel continues to assign a pid for the process in
that namespace. In the above example, pids[0] is 0, meaning the kernel will
assign next available pid to the process in init_pid_ns. But kernel will assign
pid 77 in the child pid namespace 1 and pid 99 in pid namespace 2. If either
77 or 99 are taken, the system call fails with -EBUSY.

If 'pid_set.num_pids' exceeds the current nesting level of pid namespaces,
the system call fails with -EINVAL.

Its mostly an exploratory patch seeking feedback on the interface.

NOTE:
	Compared to clone(), clone_with_pids() needs to pass in two more
	pieces of information:

		- number of pids in the set
		- user buffer containing the list of pids.

	But since clone() already takes 5 parameters, use a 'struct
	target_pid_set'.

TODO:
	- Gently tested.
	- May need additional sanity checks in check_target_pids()
	- Allow CLONE_NEWPID() with clone_with_pids() (ensure target-pid in
	  the namespace is either 1 or 0).

Changelog[v1]:
	- Fixed some compile errors (had fixed these errors earlier in my
	  git tree but had not refreshed patches before emailing them)

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 arch/x86/include/asm/syscalls.h    |    1 +
 arch/x86/include/asm/unistd_32.h   |    1 +
 arch/x86/kernel/entry_32.S         |    1 +
 arch/x86/kernel/process_32.c       |   94 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/syscall_table_32.S |    1 +
 include/linux/types.h              |    5 ++
 6 files changed, 103 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408..1fdc149 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
 /* kernel/process_32.c */
 int sys_fork(struct pt_regs *);
 int sys_clone(struct pt_regs *);
+int sys_clone_with_pids(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
 int sys_execve(struct pt_regs *);
 
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..90f906f 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_clone_with_pids	335
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add..ee92b0d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -707,6 +707,7 @@ ptregs_##name: \
 PTREGSCALL(iopl)
 PTREGSCALL(fork)
 PTREGSCALL(clone)
+PTREGSCALL(clone_with_pids)
 PTREGSCALL(vfork)
 PTREGSCALL(execve)
 PTREGSCALL(sigaltstack)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84..65b27a8 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -445,6 +445,100 @@ int sys_clone(struct pt_regs *regs)
 	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
 }
 
+static int check_target_pids(unsigned long clone_flags,
+		struct target_pid_set *pid_setp)
+{
+	/*
+	 * CLONE_NEWPID implies pid == 1
+	 *
+	 * TODO: Maybe this should be more fine-grained (i.e would we want
+	 *  	 to have a container-init have a specific pid in ancestor
+	 *  	 namespaces ?)
+	 */
+	if (clone_flags & CLONE_NEWPID)
+		return -EINVAL;
+
+	/* number of pids must match current nesting level of pid ns */
+	if (pid_setp->num_pids > task_pid(current)->level + 1)
+		return -EINVAL;
+
+	/* TODO: More sanity checks ?  */
+
+	return 0;
+}
+
+static pid_t *copy_target_pids(unsigned long clone_flags, void __user *upid_setp)
+{
+	int rc;
+	int size;
+	pid_t __user *utarget_pids;
+	pid_t *target_pids;
+	struct target_pid_set pid_set;
+
+	if (copy_from_user(&pid_set, upid_setp, sizeof(pid_set)))
+		return ERR_PTR(-EFAULT);
+
+	size = pid_set.num_pids * sizeof(pid_t);
+	utarget_pids = pid_set.target_pids;
+
+	target_pids = kzalloc(size, GFP_KERNEL);
+	if (!target_pids)
+		return ERR_PTR(-ENOMEM);
+
+	rc = -EFAULT;
+	if (copy_from_user(target_pids, utarget_pids, size))
+		goto out_free;
+
+	/* Replace user-copy of target-pids in pid_set with kernel-copy */
+	pid_set.target_pids = target_pids;
+
+	rc = check_target_pids(clone_flags, &pid_set);
+	if (rc)
+		goto out_free;
+
+	printk(KERN_ERR "clone_with_pids() num_pids %d, [ %d, %d ]\n",
+			pid_set.num_pids, target_pids[0], target_pids[1]);
+
+	return target_pids;
+
+out_free:
+	kfree(target_pids);
+	return ERR_PTR(rc);
+}
+
+int sys_clone_with_pids(struct pt_regs *regs)
+{
+	unsigned long clone_flags;
+	unsigned long newsp;
+	int __user *parent_tidptr;
+	int __user *child_tidptr;
+	void __user *upid_setp;
+	pid_t *target_pids;
+	int rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	clone_flags = regs->bx;
+	newsp = regs->cx;
+	parent_tidptr = (int __user *)regs->dx;
+	child_tidptr = (int __user *)regs->di;
+	upid_setp = (void __user *)regs->bp;
+
+	if (!newsp)
+		newsp = regs->sp;
+
+	target_pids = copy_target_pids(clone_flags, upid_setp);
+	if (IS_ERR(target_pids))
+		return PTR_ERR(target_pids);
+
+	rc = do_fork_with_pids(clone_flags, newsp, regs, 0, parent_tidptr,
+			child_tidptr, target_pids);
+
+	kfree(target_pids);
+	return rc;
+}
+
 /*
  * sys_execve() executes a new program.
  */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..94c1a58 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long ptregs_clone_with_pids	/* 335 */
diff --git a/include/linux/types.h b/include/linux/types.h
index 5abe354..17ec186 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -204,6 +204,11 @@ struct ustat {
 	char			f_fpack[6];
 };
 
+struct target_pid_set {
+	int num_pids;
+	pid_t *target_pids;
+};
+
 #endif	/* __KERNEL__ */
 #endif /*  __ASSEMBLY__ */
 #endif /* _LINUX_TYPES_H */
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (5 preceding siblings ...)
  2009-05-27 15:45   ` [PATCH 7/7] [PATCH] Define clone_with_pids syscall Sukadev Bhattiprolu
@ 2009-05-27 17:26   ` Dave Hansen
  2009-05-27 17:48   ` Serge E. Hallyn
  2009-05-27 19:31   ` Oren Laadan
  8 siblings, 0 replies; 22+ messages in thread
From: Dave Hansen @ 2009-05-27 17:26 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

On Wed, 2009-05-27 at 08:42 -0700, Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:39 -0700
> Subject: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
>  1 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index b2e5f78..c0aaebe 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
>  	atomic_inc(&map->nr_free);
>  }
> 
> +static int alloc_pidmap_page(struct pidmap *map)
> +{
> +	void *page;
> +
> +	if (likely(map->page))
> +		return 0;
> +
> +	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +
> +	/*
> +	 * Free the page if someone raced with us installing it:
> +	 */
> +	spin_lock_irq(&pidmap_lock);
> +	if (map->page)
> +		kfree(page);
> +	else
> +		map->page = page;
> +	spin_unlock_irq(&pidmap_lock);
> +
> +	if (unlikely(!map->page))
> +		return -1;
> +

-ENOMEM, please

Otherwise looks fine.  Please at least add some minimal patch
description about what you're doing and why, though.

-- Dave

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
       [not found]     ` <20090527154259.GB3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 17:27       ` Dave Hansen
  2009-05-27 17:43       ` Serge E. Hallyn
  2009-05-27 19:37       ` Oren Laadan
  2 siblings, 0 replies; 22+ messages in thread
From: Dave Hansen @ 2009-05-27 17:27 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

On Wed, 2009-05-27 at 08:42 -0700, Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:40 -0700
> Subject: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
> 
> alloc_pidmap() can fail either because all pid numbers are in use or
> we can't allocate memory. With support for setting a specific pid
> number, alloc_pidmap() would also fail if either the given pid
> number is invalid or in use.
> 
> Rather than have caller assume -ENOMEM, have alloc_pidmap() return
> the actual error.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  kernel/fork.c |    5 +++--
>  kernel/pid.c  |    9 ++++++---
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b9e2edd..f8411a8 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
> 
>  	if (pid != &init_struct_pid) {
> -		retval = -ENOMEM;
>  		pid = alloc_pid(p->nsproxy->pid_ns);
> -		if (!pid)
> +		if (IS_ERR(pid)) {
> +			retval = PTR_ERR(pid);
>  			goto bad_fork_cleanup_io;
> +		}
> 
>  		if (clone_flags & CLONE_NEWPID) {
>  			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c0aaebe..fd72ad9 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
> +	int rc = -EAGAIN;
> 
>  	pid = last + 1;
>  	if (pid >= pid_max)
> @@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (alloc_pidmap_page(map))
> +		if (alloc_pidmap_page(map)) {
> +			rc = -ENOMEM;
>  			break;
> +		}

OK, pet peeve time:

	rc = alloc_pidmap_page(map);
	if (rc)
		break;

It saves the bracket and saves a line of assignment, *and* it clarifies
program flow.

-- Dave

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
       [not found]     ` <20090527154259.GB3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 17:27       ` Dave Hansen
@ 2009-05-27 17:43       ` Serge E. Hallyn
  2009-05-27 19:37       ` Oren Laadan
  2 siblings, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 17:43 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:40 -0700
> Subject: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
> 
> alloc_pidmap() can fail either because all pid numbers are in use or
> we can't allocate memory. With support for setting a specific pid
> number, alloc_pidmap() would also fail if either the given pid
> number is invalid or in use.
> 
> Rather than have caller assume -ENOMEM, have alloc_pidmap() return
> the actual error.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

> ---
>  kernel/fork.c |    5 +++--
>  kernel/pid.c  |    9 ++++++---
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b9e2edd..f8411a8 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
> 
>  	if (pid != &init_struct_pid) {
> -		retval = -ENOMEM;
>  		pid = alloc_pid(p->nsproxy->pid_ns);
> -		if (!pid)
> +		if (IS_ERR(pid)) {
> +			retval = PTR_ERR(pid);
>  			goto bad_fork_cleanup_io;
> +		}
> 
>  		if (clone_flags & CLONE_NEWPID) {
>  			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c0aaebe..fd72ad9 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
> +	int rc = -EAGAIN;
> 
>  	pid = last + 1;
>  	if (pid >= pid_max)
> @@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (alloc_pidmap_page(map))
> +		if (alloc_pidmap_page(map)) {
> +			rc = -ENOMEM;
>  			break;
> +		}
> 
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
> @@ -192,7 +195,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  		}
>  		pid = mk_pid(pid_ns, map, offset);
>  	}
> -	return -1;
> +	return rc;
>  }
> 
>  int next_pidmap(struct pid_namespace *pid_ns, int last)
> @@ -297,7 +300,7 @@ out_free:
>  		free_pidmap(pid->numbers + i);
> 
>  	kmem_cache_free(ns->pid_cachep, pid);
> -	pid = NULL;
> +	pid = ERR_PTR(nr);
>  	goto out;
>  }
> 
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (6 preceding siblings ...)
  2009-05-27 17:26   ` [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page Dave Hansen
@ 2009-05-27 17:48   ` Serge E. Hallyn
  2009-05-27 19:31   ` Oren Laadan
  8 siblings, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 17:48 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:39 -0700
> Subject: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

> ---
>  kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
>  1 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index b2e5f78..c0aaebe 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
>  	atomic_inc(&map->nr_free);
>  }
> 
> +static int alloc_pidmap_page(struct pidmap *map)
> +{
> +	void *page;
> +
> +	if (likely(map->page))
> +		return 0;
> +
> +	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +
> +	/*
> +	 * Free the page if someone raced with us installing it:
> +	 */
> +	spin_lock_irq(&pidmap_lock);
> +	if (map->page)
> +		kfree(page);
> +	else
> +		map->page = page;
> +	spin_unlock_irq(&pidmap_lock);
> +
> +	if (unlikely(!map->page))
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
> @@ -134,21 +159,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (unlikely(!map->page)) {
> -			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> -			/*
> -			 * Free the page if someone raced with us
> -			 * installing it:
> -			 */
> -			spin_lock_irq(&pidmap_lock);
> -			if (map->page)
> -				kfree(page);
> -			else
> -				map->page = page;
> -			spin_unlock_irq(&pidmap_lock);
> -			if (unlikely(!map->page))
> -				break;
> -		}
> +		if (alloc_pidmap_page(map))
> +			break;
> +
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
>  				if (!test_and_set_bit(offset, map->page)) {
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap()
       [not found]     ` <20090527154333.GC3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 17:56       ` Serge E. Hallyn
  0 siblings, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 17:56 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:41 -0700
> Subject: [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap()
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

How about

#define TARGET_PID_UNSPECIFIED 0

or something to pass to alloc_pidmap() from alloc_pid()?
Up to you....  More importantly:

> ---
>  kernel/pid.c |   28 ++++++++++++++++++++++++++--
>  1 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index fd72ad9..93406c6 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -147,12 +147,36 @@ static int alloc_pidmap_page(struct pidmap *map)
>  	return 0;
>  }
> 
> -static int alloc_pidmap(struct pid_namespace *pid_ns)
> +static int set_pidmap(struct pid_namespace *pid_ns, int pid)
> +{
> +	int offset;
> +	struct pidmap *map;
> +
> +	if (pid >= pid_max)
> +		return -EINVAL;

what about pid < 0?

> +
> +	offset = pid & BITS_PER_PAGE_MASK;
> +	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
> +
> +	if (alloc_pidmap_page(map))
> +		return -ENOMEM;
> +
> +	if (test_and_set_bit(offset, map->page))
> +		return -EBUSY;
> +
> +	atomic_dec(&map->nr_free);
> +	return pid;
> +}
> +
> +static int alloc_pidmap(struct pid_namespace *pid_ns, int target_pid)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
>  	int rc = -EAGAIN;
> 
> +	if (target_pid)
> +		return set_pidmap(pid_ns, target_pid);
> +
>  	pid = last + 1;
>  	if (pid >= pid_max)
>  		pid = RESERVED_PIDS;
> @@ -269,7 +293,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
> 
>  	tmp = ns;
>  	for (i = ns->level; i >= 0; i--) {
> -		nr = alloc_pidmap(tmp);
> +		nr = alloc_pidmap(tmp, 0);
>  		if (nr < 0)
>  			goto out_free;
> 
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid()
       [not found]     ` <20090527154355.GD3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 18:05       ` Serge E. Hallyn
  0 siblings, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 18:05 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:42 -0700
> Subject: [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid()
> 
> This parameter is currently NULL, but will be used in a follow-on patch.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/pid.h |    2 +-
>  kernel/fork.c       |    3 ++-
>  kernel/pid.c        |    9 +++++++--
>  3 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 49f1c2f..914185d 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
>  extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
>  int next_pidmap(struct pid_namespace *pid_ns, int last);
> 
> -extern struct pid *alloc_pid(struct pid_namespace *ns);
> +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids);
>  extern void free_pid(struct pid *pid);
> 
>  /*
> diff --git a/kernel/fork.c b/kernel/fork.c
> index f8411a8..d2d69d3 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -954,6 +954,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  	int retval;
>  	struct task_struct *p;
>  	int cgroup_callbacks_done = 0;
> +	pid_t *target_pids = NULL;
> 
>  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>  		return ERR_PTR(-EINVAL);
> @@ -1119,7 +1120,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
> 
>  	if (pid != &init_struct_pid) {
> -		pid = alloc_pid(p->nsproxy->pid_ns);
> +		pid = alloc_pid(p->nsproxy->pid_ns, target_pids);
>  		if (IS_ERR(pid)) {
>  			retval = PTR_ERR(pid);
>  			goto bad_fork_cleanup_io;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 93406c6..4b2373a 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -279,13 +279,14 @@ void free_pid(struct pid *pid)
>  	call_rcu(&pid->rcu, delayed_put_pid);
>  }
> 
> -struct pid *alloc_pid(struct pid_namespace *ns)
> +struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids)
>  {
>  	struct pid *pid;
>  	enum pid_type type;
>  	int i, nr;
>  	struct pid_namespace *tmp;
>  	struct upid *upid;
> +	int tpid;
> 
>  	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
>  	if (!pid)
> @@ -293,7 +294,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
> 
>  	tmp = ns;
>  	for (i = ns->level; i >= 0; i--) {
> -		nr = alloc_pidmap(tmp, 0);
> +		tpid = 0;
> +		if (target_pids)
> +			tpid = target_pids[i];
> +
> +		nr = alloc_pidmap(tmp, tpid);
>  		if (nr < 0)
>  			goto out_free;
> 
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()
       [not found]     ` <20090527154422.GE3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 18:05       ` Serge E. Hallyn
  2009-05-27 19:41       ` Oren Laadan
  1 sibling, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 18:05 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:43 -0700
> Subject: [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()
> 
> The new parameter will be used in a follow-on patch when clone_with_pids()
> is implemented.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

> ---
>  kernel/fork.c |    7 ++++---
>  1 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d2d69d3..373411e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -949,12 +949,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  					unsigned long stack_size,
>  					int __user *child_tidptr,
>  					struct pid *pid,
> +					pid_t *target_pids,
>  					int trace)
>  {
>  	int retval;
>  	struct task_struct *p;
>  	int cgroup_callbacks_done = 0;
> -	pid_t *target_pids = NULL;
> 
>  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>  		return ERR_PTR(-EINVAL);
> @@ -1327,7 +1327,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
>  	struct pt_regs regs;
> 
>  	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
> -			    &init_struct_pid, 0);
> +			    &init_struct_pid, NULL, 0);
>  	if (!IS_ERR(task))
>  		init_idle(task, cpu);
> 
> @@ -1350,6 +1350,7 @@ long do_fork(unsigned long clone_flags,
>  	struct task_struct *p;
>  	int trace = 0;
>  	long nr;
> +	pid_t *target_pids = NULL;
> 
>  	/*
>  	 * Do some preliminary argument and permissions checking before we
> @@ -1390,7 +1391,7 @@ long do_fork(unsigned long clone_flags,
>  		trace = tracehook_prepare_clone(clone_flags);
> 
>  	p = copy_process(clone_flags, stack_start, regs, stack_size,
> -			 child_tidptr, NULL, trace);
> +			 child_tidptr, NULL, target_pids, trace);
>  	/*
>  	 * Do this prior waking up the new thread - the thread pointer
>  	 * might get invalid after that point, if the thread exits quickly.
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/7] [PATCH] Define do_fork_with_pids()
       [not found]     ` <20090527154443.GF3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 18:05       ` Serge E. Hallyn
  2009-05-27 19:40       ` Oren Laadan
  1 sibling, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 18:05 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:44 -0700
> Subject: [PATCH 6/7] [PATCH] Define do_fork_with_pids()
> 
> do_fork_with_pids() is same as do_fork(), except that it takes an
> additional, target_pids, parameter. This parameter, currently unused,
> specifies the target_pids of the process in each of its pid namespaces.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/sched.h |    1 +
>  kernel/fork.c         |   17 ++++++++++++++---
>  2 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index b4c38bc..2173df1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1995,6 +1995,7 @@ extern int disallow_signal(int);
> 
>  extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
>  extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
> +extern long do_fork_with_pids(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, pid_t *target_pids);
>  struct task_struct *fork_idle(int);
> 
>  extern void set_task_comm(struct task_struct *tsk, char *from);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 373411e..912d008 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1340,17 +1340,17 @@ struct task_struct * __cpuinit fork_idle(int cpu)
>   * It copies the process, and if successful kick-starts
>   * it and waits for it to finish using the VM if required.
>   */
> -long do_fork(unsigned long clone_flags,
> +long do_fork_with_pids(unsigned long clone_flags,
>  	      unsigned long stack_start,
>  	      struct pt_regs *regs,
>  	      unsigned long stack_size,
>  	      int __user *parent_tidptr,
> -	      int __user *child_tidptr)
> +	      int __user *child_tidptr,
> +	      pid_t *target_pids)
>  {
>  	struct task_struct *p;
>  	int trace = 0;
>  	long nr;
> -	pid_t *target_pids = NULL;
> 
>  	/*
>  	 * Do some preliminary argument and permissions checking before we
> @@ -1448,6 +1448,17 @@ long do_fork(unsigned long clone_flags,
>  	return nr;
>  }
> 
> +long do_fork(unsigned long clone_flags,
> +	      unsigned long stack_start,
> +	      struct pt_regs *regs,
> +	      unsigned long stack_size,
> +	      int __user *parent_tidptr,
> +	      int __user *child_tidptr)
> +{
> +	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
> +			parent_tidptr, child_tidptr, NULL);
> +}
> +
>  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
>  #define ARCH_MIN_MMSTRUCT_ALIGN 0
>  #endif
> -- 
> 1.5.2.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 7/7] [PATCH] Define clone_with_pids syscall
       [not found]     ` <20090527154507.GG3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-27 18:19       ` Serge E. Hallyn
  2009-05-27 19:53       ` Oren Laadan
  1 sibling, 0 replies; 22+ messages in thread
From: Serge E. Hallyn @ 2009-05-27 18:19 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:45 -0700
> Subject: [PATCH 7/7] [PATCH] Define clone_with_pids syscall
> 
> clone_with_pids() is same as clone(), except that it takes a 'target_pid_set'
> paramter which lets caller choose a specific pid number for the child process
> in each of the child process's pid namespace. This system call would be needed
> to implement Checkpoint/Restart (i.e after a checkpoint, restart a process with
> its original pids).

I think you should point out here that CAP_SYS_ADMIN is needed
to use the syscall, so unprivileged tasks can't use this to try to
play games with /var/run/*.pid.

...

> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

-serge

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
       [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (7 preceding siblings ...)
  2009-05-27 17:48   ` Serge E. Hallyn
@ 2009-05-27 19:31   ` Oren Laadan
  8 siblings, 0 replies; 22+ messages in thread
From: Oren Laadan @ 2009-05-27 19:31 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen



Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:39 -0700
> Subject: [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---

Reviewed-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

>  kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
>  1 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index b2e5f78..c0aaebe 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
>  	atomic_inc(&map->nr_free);
>  }
>  
> +static int alloc_pidmap_page(struct pidmap *map)
> +{
> +	void *page;
> +
> +	if (likely(map->page))
> +		return 0;
> +
> +	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +
> +	/*
> +	 * Free the page if someone raced with us installing it:
> +	 */
> +	spin_lock_irq(&pidmap_lock);
> +	if (map->page)
> +		kfree(page);
> +	else
> +		map->page = page;
> +	spin_unlock_irq(&pidmap_lock);
> +
> +	if (unlikely(!map->page))
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
> @@ -134,21 +159,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (unlikely(!map->page)) {
> -			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> -			/*
> -			 * Free the page if someone raced with us
> -			 * installing it:
> -			 */
> -			spin_lock_irq(&pidmap_lock);
> -			if (map->page)
> -				kfree(page);
> -			else
> -				map->page = page;
> -			spin_unlock_irq(&pidmap_lock);
> -			if (unlikely(!map->page))
> -				break;
> -		}
> +		if (alloc_pidmap_page(map))
> +			break;
> +
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
>  				if (!test_and_set_bit(offset, map->page)) {

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
       [not found]     ` <20090527154259.GB3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 17:27       ` Dave Hansen
  2009-05-27 17:43       ` Serge E. Hallyn
@ 2009-05-27 19:37       ` Oren Laadan
  2 siblings, 0 replies; 22+ messages in thread
From: Oren Laadan @ 2009-05-27 19:37 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen



Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:40 -0700
> Subject: [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code
> 
> alloc_pidmap() can fail either because all pid numbers are in use or
> we can't allocate memory. With support for setting a specific pid
> number, alloc_pidmap() would also fail if either the given pid
> number is invalid or in use.
> 
> Rather than have caller assume -ENOMEM, have alloc_pidmap() return
> the actual error.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---

Reviewed-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

>  kernel/fork.c |    5 +++--
>  kernel/pid.c  |    9 ++++++---
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b9e2edd..f8411a8 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
>  
>  	if (pid != &init_struct_pid) {
> -		retval = -ENOMEM;
>  		pid = alloc_pid(p->nsproxy->pid_ns);
> -		if (!pid)
> +		if (IS_ERR(pid)) {
> +			retval = PTR_ERR(pid);
>  			goto bad_fork_cleanup_io;
> +		}
>  
>  		if (clone_flags & CLONE_NEWPID) {
>  			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c0aaebe..fd72ad9 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
> +	int rc = -EAGAIN;
>  
>  	pid = last + 1;
>  	if (pid >= pid_max)
> @@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (alloc_pidmap_page(map))
> +		if (alloc_pidmap_page(map)) {
> +			rc = -ENOMEM;
>  			break;
> +		}
>  
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
> @@ -192,7 +195,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  		}
>  		pid = mk_pid(pid_ns, map, offset);
>  	}
> -	return -1;
> +	return rc;
>  }
>  
>  int next_pidmap(struct pid_namespace *pid_ns, int last)
> @@ -297,7 +300,7 @@ out_free:
>  		free_pidmap(pid->numbers + i);
>  
>  	kmem_cache_free(ns->pid_cachep, pid);
> -	pid = NULL;
> +	pid = ERR_PTR(nr);
>  	goto out;
>  }
>  

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/7] [PATCH] Define do_fork_with_pids()
       [not found]     ` <20090527154443.GF3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 18:05       ` Serge E. Hallyn
@ 2009-05-27 19:40       ` Oren Laadan
  1 sibling, 0 replies; 22+ messages in thread
From: Oren Laadan @ 2009-05-27 19:40 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen



Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:44 -0700
> Subject: [PATCH 6/7] [PATCH] Define do_fork_with_pids()
> 
> do_fork_with_pids() is same as do_fork(), except that it takes an
> additional, target_pids, parameter. This parameter, currently unused,
> specifies the target_pids of the process in each of its pid namespaces.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---

Reviewed-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

>  include/linux/sched.h |    1 +
>  kernel/fork.c         |   17 ++++++++++++++---
>  2 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index b4c38bc..2173df1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1995,6 +1995,7 @@ extern int disallow_signal(int);
>  
>  extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
>  extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
> +extern long do_fork_with_pids(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, pid_t *target_pids);
>  struct task_struct *fork_idle(int);
>  
>  extern void set_task_comm(struct task_struct *tsk, char *from);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 373411e..912d008 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1340,17 +1340,17 @@ struct task_struct * __cpuinit fork_idle(int cpu)
>   * It copies the process, and if successful kick-starts
>   * it and waits for it to finish using the VM if required.
>   */
> -long do_fork(unsigned long clone_flags,
> +long do_fork_with_pids(unsigned long clone_flags,
>  	      unsigned long stack_start,
>  	      struct pt_regs *regs,
>  	      unsigned long stack_size,
>  	      int __user *parent_tidptr,
> -	      int __user *child_tidptr)
> +	      int __user *child_tidptr,
> +	      pid_t *target_pids)
>  {
>  	struct task_struct *p;
>  	int trace = 0;
>  	long nr;
> -	pid_t *target_pids = NULL;
>  
>  	/*
>  	 * Do some preliminary argument and permissions checking before we
> @@ -1448,6 +1448,17 @@ long do_fork(unsigned long clone_flags,
>  	return nr;
>  }
>  
> +long do_fork(unsigned long clone_flags,
> +	      unsigned long stack_start,
> +	      struct pt_regs *regs,
> +	      unsigned long stack_size,
> +	      int __user *parent_tidptr,
> +	      int __user *child_tidptr)
> +{
> +	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
> +			parent_tidptr, child_tidptr, NULL);
> +}
> +
>  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
>  #define ARCH_MIN_MMSTRUCT_ALIGN 0
>  #endif

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()
       [not found]     ` <20090527154422.GE3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 18:05       ` Serge E. Hallyn
@ 2009-05-27 19:41       ` Oren Laadan
  1 sibling, 0 replies; 22+ messages in thread
From: Oren Laadan @ 2009-05-27 19:41 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen



Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:43 -0700
> Subject: [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process()
> 
> The new parameter will be used in a follow-on patch when clone_with_pids()
> is implemented.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---

Reviewed-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

>  kernel/fork.c |    7 ++++---
>  1 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d2d69d3..373411e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -949,12 +949,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  					unsigned long stack_size,
>  					int __user *child_tidptr,
>  					struct pid *pid,
> +					pid_t *target_pids,
>  					int trace)
>  {
>  	int retval;
>  	struct task_struct *p;
>  	int cgroup_callbacks_done = 0;
> -	pid_t *target_pids = NULL;
>  
>  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>  		return ERR_PTR(-EINVAL);
> @@ -1327,7 +1327,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
>  	struct pt_regs regs;
>  
>  	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
> -			    &init_struct_pid, 0);
> +			    &init_struct_pid, NULL, 0);
>  	if (!IS_ERR(task))
>  		init_idle(task, cpu);
>  
> @@ -1350,6 +1350,7 @@ long do_fork(unsigned long clone_flags,
>  	struct task_struct *p;
>  	int trace = 0;
>  	long nr;
> +	pid_t *target_pids = NULL;
>  
>  	/*
>  	 * Do some preliminary argument and permissions checking before we
> @@ -1390,7 +1391,7 @@ long do_fork(unsigned long clone_flags,
>  		trace = tracehook_prepare_clone(clone_flags);
>  
>  	p = copy_process(clone_flags, stack_start, regs, stack_size,
> -			 child_tidptr, NULL, trace);
> +			 child_tidptr, NULL, target_pids, trace);
>  	/*
>  	 * Do this prior waking up the new thread - the thread pointer
>  	 * might get invalid after that point, if the thread exits quickly.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 7/7] [PATCH] Define clone_with_pids syscall
       [not found]     ` <20090527154507.GG3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-05-27 18:19       ` Serge E. Hallyn
@ 2009-05-27 19:53       ` Oren Laadan
       [not found]         ` <4A1D9A47.6030201-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  1 sibling, 1 reply; 22+ messages in thread
From: Oren Laadan @ 2009-05-27 19:53 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers, David C. Hansen



Sukadev Bhattiprolu wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> Date: Mon, 4 May 2009 01:17:45 -0700
> Subject: [PATCH 7/7] [PATCH] Define clone_with_pids syscall
> 
> clone_with_pids() is same as clone(), except that it takes a 'target_pid_set'
> paramter which lets caller choose a specific pid number for the child process
> in each of the child process's pid namespace. This system call would be needed
> to implement Checkpoint/Restart (i.e after a checkpoint, restart a process with
> its original pids).
> 
> Call clone_with_pids as follows:
> 
> 	pid_t pids[] = { 0, 77, 99 };
> 	struct target_pid_set pid_set;
> 
> 	pid_set.num_pids = sizeof(pids) / sizeof(int);
> 	pid_set.target_pids = &pids;
> 
> 	syscall(__NR_clone_with_pids, flags, stack, NULL, NULL, NULL, &pid_set);
> 
> If a target-pid is 0, the kernel continues to assign a pid for the process in
> that namespace. In the above example, pids[0] is 0, meaning the kernel will
> assign next available pid to the process in init_pid_ns. But kernel will assign
> pid 77 in the child pid namespace 1 and pid 99 in pid namespace 2. If either
> 77 or 99 are taken, the system call fails with -EBUSY.
> 
> If 'pid_set.num_pids' exceeds the current nesting level of pid namespaces,
> the system call fails with -EINVAL.
> 
> Its mostly an exploratory patch seeking feedback on the interface.
> 
> NOTE:
> 	Compared to clone(), clone_with_pids() needs to pass in two more
> 	pieces of information:
> 
> 		- number of pids in the set
> 		- user buffer containing the list of pids.
> 
> 	But since clone() already takes 5 parameters, use a 'struct
> 	target_pid_set'.
> 
> TODO:
> 	- Gently tested.
> 	- May need additional sanity checks in check_target_pids()
> 	- Allow CLONE_NEWPID() with clone_with_pids() (ensure target-pid in
> 	  the namespace is either 1 or 0).
> 
> Changelog[v1]:
> 	- Fixed some compile errors (had fixed these errors earlier in my
> 	  git tree but had not refreshed patches before emailing them)
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---

Reviewed-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>

but ...

[...]

> +static pid_t *copy_target_pids(unsigned long clone_flags, void __user *upid_setp)
> +{
> +	int rc;
> +	int size;
> +	pid_t __user *utarget_pids;
> +	pid_t *target_pids;
> +	struct target_pid_set pid_set;
> +
> +	if (copy_from_user(&pid_set, upid_setp, sizeof(pid_set)))
> +		return ERR_PTR(-EFAULT);
> +
> +	size = pid_set.num_pids * sizeof(pid_t);

...either test "pid_set.num_pids > 0" (and give -EINVAL),
or...

[...]

>  
> +struct target_pid_set {
> +	int num_pids;

... make this 'size_t' ?


> +	pid_t *target_pids;
> +};
> +
>  #endif	/* __KERNEL__ */
>  #endif /*  __ASSEMBLY__ */
>  #endif /* _LINUX_TYPES_H */

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 7/7] [PATCH] Define clone_with_pids syscall
       [not found]         ` <4A1D9A47.6030201-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2009-05-28  2:58           ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 22+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-28  2:58 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers, David C. Hansen

| > +	if (copy_from_user(&pid_set, upid_setp, sizeof(pid_set)))
| > +		return ERR_PTR(-EFAULT);
| > +
| > +	size = pid_set.num_pids * sizeof(pid_t);
| 
| ...either test "pid_set.num_pids > 0" (and give -EINVAL),
| or...

Good point. I now check for num_pids > 0 and treat num_pids == 0 as
normal clone().

While addressing this I realized I had a lot of arch-independent code
in arch/x86/kernel/process_32.c. I have now moved this common code to
kernel/fork.c. Its non-trivial code move, so need new review/acks from
you and Serge for at least patches 6 and 7.

Sukadev

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2009-05-28  2:58 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-27 15:42 [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page Sukadev Bhattiprolu
     [not found] ` <20090527154212.GA3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 15:42   ` [PATCH 2/7] [PATCH] Have alloc_pidmap() return actual error code Sukadev Bhattiprolu
     [not found]     ` <20090527154259.GB3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 17:27       ` Dave Hansen
2009-05-27 17:43       ` Serge E. Hallyn
2009-05-27 19:37       ` Oren Laadan
2009-05-27 15:43   ` [PATCH 3/7] [PATCH] Add target_pid parameter to alloc_pidmap() Sukadev Bhattiprolu
     [not found]     ` <20090527154333.GC3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 17:56       ` Serge E. Hallyn
2009-05-27 15:43   ` [PATCH 4/7] [PATCH] Add target_pids parameter to alloc_pid() Sukadev Bhattiprolu
     [not found]     ` <20090527154355.GD3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 18:05       ` Serge E. Hallyn
2009-05-27 15:44   ` [PATCH 5/7] [PATCH] Add target_pids parameter to copy_process() Sukadev Bhattiprolu
     [not found]     ` <20090527154422.GE3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 18:05       ` Serge E. Hallyn
2009-05-27 19:41       ` Oren Laadan
2009-05-27 15:44   ` [PATCH 6/7] [PATCH] Define do_fork_with_pids() Sukadev Bhattiprolu
     [not found]     ` <20090527154443.GF3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 18:05       ` Serge E. Hallyn
2009-05-27 19:40       ` Oren Laadan
2009-05-27 15:45   ` [PATCH 7/7] [PATCH] Define clone_with_pids syscall Sukadev Bhattiprolu
     [not found]     ` <20090527154507.GG3107-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-27 18:19       ` Serge E. Hallyn
2009-05-27 19:53       ` Oren Laadan
     [not found]         ` <4A1D9A47.6030201-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-05-28  2:58           ` Sukadev Bhattiprolu
2009-05-27 17:26   ` [PATCH 1/7] [PATCH] Factor out code to allocate pidmap page Dave Hansen
2009-05-27 17:48   ` Serge E. Hallyn
2009-05-27 19:31   ` Oren Laadan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.