All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 1/7] Factor out code to allocate pidmap page
@ 2009-05-04  8:17 sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>


Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
 1 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index b2e5f78..c0aaebe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
 	atomic_inc(&map->nr_free);
 }

+static int alloc_pidmap_page(struct pidmap *map)
+{
+	void *page;
+
+	if (likely(map->page))
+		return 0;
+
+	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+	/*
+	 * Free the page if someone raced with us installing it:
+	 */
+	spin_lock_irq(&pidmap_lock);
+	if (map->page)
+		kfree(page);
+	else
+		map->page = page;
+	spin_unlock_irq(&pidmap_lock);
+
+	if (unlikely(!map->page))
+		return -1;
+
+	return 0;
+}
+
 static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
@@ -134,21 +159,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (unlikely(!map->page)) {
-			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irq(&pidmap_lock);
-			if (map->page)
-				kfree(page);
-			else
-				map->page = page;
-			spin_unlock_irq(&pidmap_lock);
-			if (unlikely(!map->page))
-				break;
-		}
+		if (alloc_pidmap_page(map))
+			break;
+
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
       [not found]     ` <1241425065670-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2009-05-04  8:17   ` [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

alloc_pidmap() can fail either because all pid numbers are in use or
we can't allocate memory. With support for setting a specific pid
number, alloc_pidmap() would also fail if either the given pid
number is invalid or in use.

Rather than have caller assume -ENOMEM, have alloc_pidmap() return
the actual error.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/fork.c |    5 +++--
 kernel/pid.c  |    9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..f8411a8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_io;

 	if (pid != &init_struct_pid) {
-		retval = -ENOMEM;
 		pid = alloc_pid(p->nsproxy->pid_ns);
-		if (!pid)
+		if (IS_ERR(pid)) {
+			retval = PTR_ERR(pid);
 			goto bad_fork_cleanup_io;
+		}

 		if (clone_flags & CLONE_NEWPID) {
 			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
diff --git a/kernel/pid.c b/kernel/pid.c
index c0aaebe..fd72ad9 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
+	int rc = -EAGAIN;

 	pid = last + 1;
 	if (pid >= pid_max)
@@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (alloc_pidmap_page(map))
+		if (alloc_pidmap_page(map)) {
+			rc = -ENOMEM;
 			break;
+		}

 		if (likely(atomic_read(&map->nr_free))) {
 			do {
@@ -192,7 +195,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 		}
 		pid = mk_pid(pid_ns, map, offset);
 	}
-	return -1;
+	return rc;
 }

 int next_pidmap(struct pid_namespace *pid_ns, int last)
@@ -297,7 +300,7 @@ out_free:
 		free_pidmap(pid->numbers + i);

 	kmem_cache_free(ns->pid_cachep, pid);
-	pid = NULL;
+	pid = ERR_PTR(nr);
 	goto out;
 }

-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap()
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2009-05-04  8:17   ` [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
       [not found]     ` <12414250651744-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2009-05-04  8:17   ` [RFC][PATCH 4/7] Add target_pids parameter to alloc_pid() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>


Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/pid.c |   28 ++++++++++++++++++++++++++--
 1 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index fd72ad9..93406c6 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -147,12 +147,36 @@ static int alloc_pidmap_page(struct pidmap *map)
 	return 0;
 }

-static int alloc_pidmap(struct pid_namespace *pid_ns)
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
+{
+	int offset;
+	struct pidmap *map;
+
+	if (pid >= pid_max)
+		return -EINVAL;
+
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+
+	if (alloc_pidmap_page(map))
+		return -ENOMEM;
+
+	if (test_and_set_bit(offset, map->page))
+		return -EBUSY;
+
+	atomic_dec(&map->nr_free);
+	return pid;
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns, int target_pid)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
 	int rc = -EAGAIN;

+	if (target_pid)
+		return set_pidmap(pid_ns, target_pid);
+
 	pid = last + 1;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
@@ -269,7 +293,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)

 	tmp = ns;
 	for (i = ns->level; i >= 0; i--) {
-		nr = alloc_pidmap(tmp);
+		nr = alloc_pidmap(tmp, 0);
 		if (nr < 0)
 			goto out_free;

-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 4/7] Add target_pids parameter to alloc_pid()
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2009-05-04  8:17   ` [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  2009-05-04  8:17   ` [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  2009-05-04  8:17   ` [RFC][PATCH 5/7] Add target_pids parameter to copy_process() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

This parameter is currently NULL, but will be used in a follow-on patch.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 include/linux/pid.h |    2 +-
 kernel/fork.c       |    3 ++-
 kernel/pid.c        |    9 +++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 49f1c2f..914185d 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 int next_pidmap(struct pid_namespace *pid_ns, int last);

-extern struct pid *alloc_pid(struct pid_namespace *ns);
+extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids);
 extern void free_pid(struct pid *pid);

 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index f8411a8..d2d69d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -954,6 +954,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	int retval;
 	struct task_struct *p;
 	int cgroup_callbacks_done = 0;
+	pid_t *target_pids = NULL;

 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1119,7 +1120,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_io;

 	if (pid != &init_struct_pid) {
-		pid = alloc_pid(p->nsproxy->pid_ns);
+		pid = alloc_pid(p->nsproxy->pid_ns, target_pids);
 		if (IS_ERR(pid)) {
 			retval = PTR_ERR(pid);
 			goto bad_fork_cleanup_io;
diff --git a/kernel/pid.c b/kernel/pid.c
index 93406c6..4b2373a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -279,13 +279,14 @@ void free_pid(struct pid *pid)
 	call_rcu(&pid->rcu, delayed_put_pid);
 }

-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, pid_t *target_pids)
 {
 	struct pid *pid;
 	enum pid_type type;
 	int i, nr;
 	struct pid_namespace *tmp;
 	struct upid *upid;
+	int tpid;

 	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
 	if (!pid)
@@ -293,7 +294,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)

 	tmp = ns;
 	for (i = ns->level; i >= 0; i--) {
-		nr = alloc_pidmap(tmp, 0);
+		tpid = 0;
+		if (target_pids)
+			tpid = target_pids[i];
+
+		nr = alloc_pidmap(tmp, tpid);
 		if (nr < 0)
 			goto out_free;

-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 5/7] Add target_pids parameter to copy_process()
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                     ` (2 preceding siblings ...)
  2009-05-04  8:17   ` [RFC][PATCH 4/7] Add target_pids parameter to alloc_pid() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  2009-05-04  8:17   ` [RFC][PATCH 6/7] Define do_fork_with_pids() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

The new parameter will be used in a follow-on patch when clone_with_pids()
is implemented.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 kernel/fork.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index d2d69d3..373411e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -949,12 +949,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_size,
 					int __user *child_tidptr,
 					struct pid *pid,
+					pid_t *target_pids,
 					int trace)
 {
 	int retval;
 	struct task_struct *p;
 	int cgroup_callbacks_done = 0;
-	pid_t *target_pids = NULL;

 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1327,7 +1327,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	struct pt_regs regs;

 	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
-			    &init_struct_pid, 0);
+			    &init_struct_pid, NULL, 0);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);

@@ -1350,6 +1350,7 @@ long do_fork(unsigned long clone_flags,
 	struct task_struct *p;
 	int trace = 0;
 	long nr;
+	pid_t *target_pids = NULL;

 	/*
 	 * Do some preliminary argument and permissions checking before we
@@ -1390,7 +1391,7 @@ long do_fork(unsigned long clone_flags,
 		trace = tracehook_prepare_clone(clone_flags);

 	p = copy_process(clone_flags, stack_start, regs, stack_size,
-			 child_tidptr, NULL, trace);
+			 child_tidptr, NULL, target_pids, trace);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 6/7] Define do_fork_with_pids()
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                     ` (3 preceding siblings ...)
  2009-05-04  8:17   ` [RFC][PATCH 5/7] Add target_pids parameter to copy_process() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
       [not found]     ` <12414250652549-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2009-05-04  8:17   ` [RFC][PATCH 7/7] Define clone_with_pids syscall sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  2009-05-04 18:20   ` [RFC][PATCH 1/7] Factor out code to allocate pidmap page Matt Helsley
  6 siblings, 1 reply; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

do_fork_with_pids() is same as do_fork(), except that it takes an
additional, target_pids, parameter. This parameter, currently unused,
specifies the target_pids of the process in each of its pid namespaces.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 include/linux/sched.h |    1 +
 kernel/fork.c         |   17 ++++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..2173df1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,6 +1995,7 @@ extern int disallow_signal(int);

 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork_with_pids(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, pid_t *target_pids);
 struct task_struct *fork_idle(int);

 extern void set_task_comm(struct task_struct *tsk, char *from);
diff --git a/kernel/fork.c b/kernel/fork.c
index 373411e..912d008 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1340,17 +1340,17 @@ struct task_struct * __cpuinit fork_idle(int cpu)
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_with_pids(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      pid_t *target_pids)
 {
 	struct task_struct *p;
 	int trace = 0;
 	long nr;
-	pid_t *target_pids = NULL;

 	/*
 	 * Do some preliminary argument and permissions checking before we
@@ -1448,6 +1448,17 @@ long do_fork(unsigned long clone_flags,
 	return nr;
 }

+long do_fork(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, NULL);
+}
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC][PATCH 7/7] Define clone_with_pids syscall
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                     ` (4 preceding siblings ...)
  2009-05-04  8:17   ` [RFC][PATCH 6/7] Define do_fork_with_pids() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04  8:17   ` sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  2009-05-04 18:20   ` [RFC][PATCH 1/7] Factor out code to allocate pidmap page Matt Helsley
  6 siblings, 0 replies; 16+ messages in thread
From: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8 @ 2009-05-04  8:17 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
  Cc: Sukadev Bhattiprolu

From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>

clone_with_pids() is same as clone(), except that it takes a 'target_pid_set'
paramter which lets caller choose a specific pid number for the child process
in each of the child process's pid namespace. This system call would be needed
to implement Checkpoint/Restart (i.e after a checkpoint, restart a process with
its original pids).

Call clone_with_pids as follows:

	pid_t pids[] = { 0, 77, 99 };
	struct target_pid_set pid_set;

	pid_set.num_pids = sizeof(pids) / sizeof(int);
	pid_set.target_pids = &pids;

	syscall(__NR_clone_with_pids, flags, stack, NULL, NULL, NULL, &pid_set);

If a target-pid is 0, the kernel continues to assign a pid for the process in
that namespace. In the above example, pids[0] is 0, meaning the kernel will
assign next available pid to the process in init_pid_ns. But kernel will assign
pid 77 in the child pid namespace 1 and pid 99 in pid namespace 2. If either
77 or 99 are taken, the system call fails with -EBUSY.

If 'pid_set.num_pids' exceeds the current nesting level of pid namespaces,
the system call fails with -EINVAL.

Its mostly an exploratory patch seeking feedback on the interface.

NOTE:
	Compared to clone(), clone_with_pids() needs to pass in two more
	pieces of information:

		- number of pids in the set
		- user buffer containing the list of pids.

	But since clone() already takes 5 parameters, use a 'struct
	target_pid_set'.

TODO:
	- Gently tested.
	- May need additional sanity checks in check_target_pids()
	- Allow CLONE_NEWPID() with clone_with_pids() (ensure target-pid in
	  the namespace is either 1 or 0).

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 arch/x86/include/asm/syscalls.h    |    1 +
 arch/x86/include/asm/unistd_32.h   |    1 +
 arch/x86/kernel/entry_32.S         |    1 +
 arch/x86/kernel/process_32.c       |   91 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/syscall_table_32.S |    1 +
 include/linux/types.h              |    5 ++
 6 files changed, 100 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408..1fdc149 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
 /* kernel/process_32.c */
 int sys_fork(struct pt_regs *);
 int sys_clone(struct pt_regs *);
+int sys_clone_with_pids(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
 int sys_execve(struct pt_regs *);

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..90f906f 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_clone_with_pids	335

 #ifdef __KERNEL__

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add..ee92b0d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -707,6 +707,7 @@ ptregs_##name: \
 PTREGSCALL(iopl)
 PTREGSCALL(fork)
 PTREGSCALL(clone)
+PTREGSCALL(clone_with_pids)
 PTREGSCALL(vfork)
 PTREGSCALL(execve)
 PTREGSCALL(sigaltstack)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84..66ac6f7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -445,6 +445,97 @@ int sys_clone(struct pt_regs *regs)
 	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
 }

+static int check_target_pids(unsigned long clone_flags,
+		struct target_pid_set *pid_setp)
+{
+	/*
+	 * CLONE_NEWPID implies pid == 1
+	 *
+	 * TODO: Maybe this should be more fine-grained (i.e would we want
+	 *  	 to have a container-init have a specific pid in ancestor
+	 *  	 namespaces ?)
+	 */
+	if (clone_flags & CLONE_NEWPID)
+		return -EINVAL;
+
+	/* number of pids must match current nesting level of pid ns */
+	if (pid_setp->num_pids > task_pid(current)->level + 1)
+		return -EINVAL;
+
+	/* TODO: More sanity checks ?  */
+
+	return 0;
+}
+
+static pid_t *copy_target_pids(unsigned long clone_flags, void __user *upid_setp)
+{
+	int rc;
+	int size;
+	unsigned long clone_flags;
+	pid_t __user *utarget_pids;
+	pid_t *target_pids;
+	struct target_pid_set pid_set;
+
+	if (copy_from_user(pid_setp, upid_setp, sizeof(*pid_setp)))
+		return ERR_PTR(-EFAULT);
+
+	size = pid_setp->num_pids * sizeof(pid_t);
+	utarget_pids = pid_setp->target_pids;
+
+	target_pids = kzalloc(size, GFP_KERNEL);
+	if (!target_pids)
+		return ERR_PTR(-ENOMEM);
+
+	rc = -EFAULT;
+	if (copy_from_user(target_pids, utarget_pids, size))
+		goto out_free;
+
+	rc = check_target_pids(clone_flags, &pid_set);
+	if (rc)
+		goto out_free;
+
+	printk(KERN_ERR "clone_with_pids() num_pids %d, [ %d, %d ]\n",
+			pid_set.num_pids, target_pids[0], target_pids[1]);
+
+	return target_pids;
+
+out_free:
+	kfree(target_pids);
+	return ERR_PTR(rc);
+}
+
+int sys_clone_with_pids(struct pt_regs *regs)
+{
+	unsigned long clone_flags;
+	unsigned long newsp;
+	int __user *parent_tidptr;
+	int __user *child_tidptr;
+	void __user *upid_setp;
+	int rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	clone_flags = regs->bx;
+	newsp = regs->cx;
+	parent_tidptr = (int __user *)regs->dx;
+	child_tidptr = (int __user *)regs->di;
+	upid_setp = (void __user *)regs->bp;
+
+	if (!newsp)
+		newsp = regs->sp;
+
+	target_pids = copy_target_pids(clone_flags, upid_setp)
+	if (IS_ERR(target_pids))
+		return PTR_ERR(target_pids);
+
+	rc = do_fork_with_pids(clone_flags, newsp, regs, 0, parent_tidptr,
+			child_tidptr, target_pids);
+out_free:
+	kfree(target_pids);
+	return rc;
+}
+
 /*
  * sys_execve() executes a new program.
  */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..94c1a58 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long ptregs_clone_with_pids	/* 335 */
diff --git a/include/linux/types.h b/include/linux/types.h
index 5abe354..17ec186 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -204,6 +204,11 @@ struct ustat {
 	char			f_fpack[6];
 };

+struct target_pid_set {
+	int num_pids;
+	pid_t *target_pids;
+};
+
 #endif	/* __KERNEL__ */
 #endif /*  __ASSEMBLY__ */
 #endif /* _LINUX_TYPES_H */
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code
       [not found]     ` <1241425065670-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2009-05-04 18:11       ` Matt Helsley
       [not found]         ` <20090504181111.GM11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Matt Helsley @ 2009-05-04 18:11 UTC (permalink / raw)
  To: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Mon, May 04, 2009 at 01:17:40AM -0700, sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> 
> alloc_pidmap() can fail either because all pid numbers are in use or
> we can't allocate memory. With support for setting a specific pid
> number, alloc_pidmap() would also fail if either the given pid
> number is invalid or in use.
> 
> Rather than have caller assume -ENOMEM, have alloc_pidmap() return
> the actual error.

I think generating actual "errors" should be moved into alloc_pidmap_page().

> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  kernel/fork.c |    5 +++--
>  kernel/pid.c  |    9 ++++++---
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b9e2edd..f8411a8 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1119,10 +1119,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
> 
>  	if (pid != &init_struct_pid) {
> -		retval = -ENOMEM;
>  		pid = alloc_pid(p->nsproxy->pid_ns);
> -		if (!pid)
> +		if (IS_ERR(pid)) {
> +			retval = PTR_ERR(pid);
>  			goto bad_fork_cleanup_io;
> +		}
> 
>  		if (clone_flags & CLONE_NEWPID) {
>  			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c0aaebe..fd72ad9 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -151,6 +151,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
> +	int rc = -EAGAIN;
> 
>  	pid = last + 1;
>  	if (pid >= pid_max)
> @@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (alloc_pidmap_page(map))
> +		if (alloc_pidmap_page(map)) {
> +			rc = -ENOMEM;

You could return -ENOMEM from alloc_pidmap_page(map) and then just:

	rc = alloc_pidmap_page(map);
	if (rc)
		break;

ENOMEM here is not necessarily correct -- you could have alloc'd pages just fine
and failed due to the last race-checking if () in alloc_pidmap_page(). All the
more reason to move generation of the error to alloc_pidmap_page() IMHO.

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 1/7] Factor out code to allocate pidmap page
       [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                     ` (5 preceding siblings ...)
  2009-05-04  8:17   ` [RFC][PATCH 7/7] Define clone_with_pids syscall sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
@ 2009-05-04 18:20   ` Matt Helsley
       [not found]     ` <20090504182059.GN11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  6 siblings, 1 reply; 16+ messages in thread
From: Matt Helsley @ 2009-05-04 18:20 UTC (permalink / raw)
  To: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Mon, May 04, 2009 at 01:17:39AM -0700, sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> 
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  kernel/pid.c |   43 ++++++++++++++++++++++++++++---------------
>  1 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index b2e5f78..c0aaebe 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -122,6 +122,31 @@ static void free_pidmap(struct upid *upid)
>  	atomic_inc(&map->nr_free);
>  }
> 
> +static int alloc_pidmap_page(struct pidmap *map)
> +{
> +	void *page;
> +
> +	if (likely(map->page))
> +		return 0;
> +
> +	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +
> +	/*
> +	 * Free the page if someone raced with us installing it:
> +	 */
> +	spin_lock_irq(&pidmap_lock);
> +	if (map->page)
> +		kfree(page);
> +	else
> +		map->page = page;
> +	spin_unlock_irq(&pidmap_lock);
> +
> +	if (unlikely(!map->page))
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int alloc_pidmap(struct pid_namespace *pid_ns)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
> @@ -134,21 +159,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
>  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (unlikely(!map->page)) {
> -			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> -			/*
> -			 * Free the page if someone raced with us
> -			 * installing it:
> -			 */
> -			spin_lock_irq(&pidmap_lock);
> -			if (map->page)
> -				kfree(page);
> -			else
> -				map->page = page;
> -			spin_unlock_irq(&pidmap_lock);
> -			if (unlikely(!map->page))
> -				break;

OK, I'm having trouble not with your patch but the original code. This
test of map->page outside the spinlock looks like an incorrect fix to a
race. If map->page can be NULL right after we release the lock then it
can become NULL after this test just as easily.

> -		}
> +		if (alloc_pidmap_page(map))
> +			break;
> +
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
>  				if (!test_and_set_bit(offset, map->page)) {

				kaboom??

Perhaps I'm just not familiar enough with the code and I'm just seeing
races where there are none... in which case perhaps a comment hinting at
the answers would be good.

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 6/7] Define do_fork_with_pids()
       [not found]     ` <12414250652549-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2009-05-04 18:26       ` Matt Helsley
       [not found]         ` <20090504182621.GO11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Matt Helsley @ 2009-05-04 18:26 UTC (permalink / raw)
  To: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Mon, May 04, 2009 at 01:17:44AM -0700, sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org wrote:
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> 
> do_fork_with_pids() is same as do_fork(), except that it takes an
> additional, target_pids, parameter. This parameter, currently unused,
> specifies the target_pids of the process in each of its pid namespaces.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  include/linux/sched.h |    1 +
>  kernel/fork.c         |   17 ++++++++++++++---
>  2 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index b4c38bc..2173df1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1995,6 +1995,7 @@ extern int disallow_signal(int);
> 
>  extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
>  extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
> +extern long do_fork_with_pids(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, pid_t *target_pids);
>  struct task_struct *fork_idle(int);
> 
>  extern void set_task_comm(struct task_struct *tsk, char *from);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 373411e..912d008 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1340,17 +1340,17 @@ struct task_struct * __cpuinit fork_idle(int cpu)
>   * It copies the process, and if successful kick-starts
>   * it and waits for it to finish using the VM if required.
>   */
> -long do_fork(unsigned long clone_flags,
> +long do_fork_with_pids(unsigned long clone_flags,
>  	      unsigned long stack_start,
>  	      struct pt_regs *regs,
>  	      unsigned long stack_size,
>  	      int __user *parent_tidptr,
> -	      int __user *child_tidptr)
> +	      int __user *child_tidptr,
> +	      pid_t *target_pids)
>  {
>  	struct task_struct *p;
>  	int trace = 0;
>  	long nr;
> -	pid_t *target_pids = NULL;
> 
>  	/*
>  	 * Do some preliminary argument and permissions checking before we
> @@ -1448,6 +1448,17 @@ long do_fork(unsigned long clone_flags,
>  	return nr;
>  }
> 
> +long do_fork(unsigned long clone_flags,
> +	      unsigned long stack_start,
> +	      struct pt_regs *regs,
> +	      unsigned long stack_size,
> +	      int __user *parent_tidptr,
> +	      int __user *child_tidptr)
> +{
> +	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
> +			parent_tidptr, child_tidptr, NULL);
> +}
> +

Would it be better to add the pid parameters to do_fork() and adjust its callers
instead?

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 1/7] Factor out code to allocate pidmap page
       [not found]     ` <20090504182059.GN11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-04 18:27       ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 16+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-04 18:27 UTC (permalink / raw)
  To: Matt Helsley; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

| OK, I'm having trouble not with your patch but the original code. This
| test of map->page outside the spinlock looks like an incorrect fix to a
| race. If map->page can be NULL right after we release the lock then it
| can become NULL after this test just as easily.

The map->page is not freed once allocated (free_pidmap() does not free
the page). I guess the point is if you exceeded 32K pids (one page worth
of pids) once, you would exceed it again; so no point freeing the page
and reallocating the next time.  

Sukadev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code
       [not found]         ` <20090504181111.GM11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-04 18:29           ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 16+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-04 18:29 UTC (permalink / raw)
  To: Matt Helsley; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Matt Helsley [matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org] wrote:
| > @@ -159,8 +160,10 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
| >  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
| >  	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
| >  	for (i = 0; i <= max_scan; ++i) {
| > -		if (alloc_pidmap_page(map))
| > +		if (alloc_pidmap_page(map)) {
| > +			rc = -ENOMEM;
| 
| You could return -ENOMEM from alloc_pidmap_page(map) and then just:
| 
| 	rc = alloc_pidmap_page(map);
| 	if (rc)
| 		break;

Sure. alloc_pidmap_page() can only fail for one reason so I let it stay,
but we might as well return -ENOMEM.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 6/7] Define do_fork_with_pids()
       [not found]         ` <20090504182621.GO11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-04 18:32           ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 16+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-04 18:32 UTC (permalink / raw)
  To: Matt Helsley; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

| > +long do_fork(unsigned long clone_flags,
| > +	      unsigned long stack_start,
| > +	      struct pt_regs *regs,
| > +	      unsigned long stack_size,
| > +	      int __user *parent_tidptr,
| > +	      int __user *child_tidptr)
| > +{
| > +	return do_fork_with_pids(clone_flags, stack_start, regs, stack_size,
| > +			parent_tidptr, child_tidptr, NULL);
| > +}
| > +
| 
| Would it be better to add the pid parameters to do_fork() and adjust its callers
| instead?

A quick count showed 70 callers of do_fork() :-) hence the short-cut - at
least for the current version of the patchset.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap()
       [not found]     ` <12414250651744-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2009-05-04 20:03       ` Serge E. Hallyn
       [not found]         ` <20090504200318.GA29491-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Serge E. Hallyn @ 2009-05-04 20:03 UTC (permalink / raw)
  To: sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Quoting sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> 
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
>  kernel/pid.c |   28 ++++++++++++++++++++++++++--
>  1 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/pid.c b/kernel/pid.c
> index fd72ad9..93406c6 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -147,12 +147,36 @@ static int alloc_pidmap_page(struct pidmap *map)
>  	return 0;
>  }
> 
> -static int alloc_pidmap(struct pid_namespace *pid_ns)
> +static int set_pidmap(struct pid_namespace *pid_ns, int pid)
> +{
> +	int offset;
> +	struct pidmap *map;
> +
> +	if (pid >= pid_max)
> +		return -EINVAL;
> +
> +	offset = pid & BITS_PER_PAGE_MASK;
> +	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
> +
> +	if (alloc_pidmap_page(map))
> +		return -ENOMEM;
> +
> +	if (test_and_set_bit(offset, map->page))
> +		return -EBUSY;
> +
> +	atomic_dec(&map->nr_free);
> +	return pid;
> +}
> +
> +static int alloc_pidmap(struct pid_namespace *pid_ns, int target_pid)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
>  	int rc = -EAGAIN;
> 
> +	if (target_pid)
> +		return set_pidmap(pid_ns, target_pid);

I think this whole patchset is still NACKed until you tag
pid_namespaces with a creator uid, and ensure that
current_uid()==pid_ns->creator_uid() at each level where
the caller is specifying a pid.

I don't see that in this set.

OTOH, your approach of pulling alloc_pidmap_page() out of
alloc_pidmap() and re-using it may be what Eric wanted to
see.

-serge

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap()
       [not found]         ` <20090504200318.GA29491-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-05-05 22:23           ` Sukadev Bhattiprolu
       [not found]             ` <20090505222342.GB20515-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Sukadev Bhattiprolu @ 2009-05-05 22:23 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Serge E. Hallyn [serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org] wrote:
| Quoting sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
| > From: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
| > 
| > 
| > Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
| > ---
| >  kernel/pid.c |   28 ++++++++++++++++++++++++++--
| >  1 files changed, 26 insertions(+), 2 deletions(-)
| > 
| > diff --git a/kernel/pid.c b/kernel/pid.c
| > index fd72ad9..93406c6 100644
| > --- a/kernel/pid.c
| > +++ b/kernel/pid.c
| > @@ -147,12 +147,36 @@ static int alloc_pidmap_page(struct pidmap *map)
| >  	return 0;
| >  }
| > 
| > -static int alloc_pidmap(struct pid_namespace *pid_ns)
| > +static int set_pidmap(struct pid_namespace *pid_ns, int pid)
| > +{
| > +	int offset;
| > +	struct pidmap *map;
| > +
| > +	if (pid >= pid_max)
| > +		return -EINVAL;
| > +
| > +	offset = pid & BITS_PER_PAGE_MASK;
| > +	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
| > +
| > +	if (alloc_pidmap_page(map))
| > +		return -ENOMEM;
| > +
| > +	if (test_and_set_bit(offset, map->page))
| > +		return -EBUSY;
| > +
| > +	atomic_dec(&map->nr_free);
| > +	return pid;
| > +}
| > +
| > +static int alloc_pidmap(struct pid_namespace *pid_ns, int target_pid)
| >  {
| >  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
| >  	struct pidmap *map;
| >  	int rc = -EAGAIN;
| > 
| > +	if (target_pid)
| > +		return set_pidmap(pid_ns, target_pid);
| 
| I think this whole patchset is still NACKed until you tag
| pid_namespaces with a creator uid, and ensure that
| current_uid()==pid_ns->creator_uid() at each level where
| the caller is specifying a pid.

I currently have CAP_SYS_ADMIN check in clone_with_pids() and was
thinking that the tagging of pid namespaces can be done indpendent
of this patchset (as would integrating your patch of making pid_max
a property of pid-namespace).

| 
| I don't see that in this set.
| 
| OTOH, your approach of pulling alloc_pidmap_page() out of
| alloc_pidmap() and re-using it may be what Eric wanted to
| see.

Yes, I think the first few helper patches in the set would be needed/
useful to restart a process with a pid (not just for the clone-with-pids
syscall).

Sukadev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap()
       [not found]             ` <20090505222342.GB20515-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2009-05-05 23:01               ` Serge E. Hallyn
  0 siblings, 0 replies; 16+ messages in thread
From: Serge E. Hallyn @ 2009-05-05 23:01 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> Serge E. Hallyn [serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org] wrote:
> | I think this whole patchset is still NACKed until you tag
> | pid_namespaces with a creator uid, and ensure that
> | current_uid()==pid_ns->creator_uid() at each level where
> | the caller is specifying a pid.
> 
> I currently have CAP_SYS_ADMIN check in clone_with_pids() and was
> thinking that the tagging of pid namespaces can be done indpendent
> of this patchset (as would integrating your patch of making pid_max
> a property of pid-namespace).

Didn't notice that check in patch 7.  Yeah, that's fine.

> | 
> | I don't see that in this set.
> | 
> | OTOH, your approach of pulling alloc_pidmap_page() out of
> | alloc_pidmap() and re-using it may be what Eric wanted to
> | see.
> 
> Yes, I think the first few helper patches in the set would be needed/
> useful to restart a process with a pid (not just for the clone-with-pids
> syscall).

thanks,
-serge

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-05-05 23:01 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-04  8:17 [RFC][PATCH 1/7] Factor out code to allocate pidmap page sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
     [not found] ` <12414250653025-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2009-05-04  8:17   ` [RFC][PATCH 2/7] Have alloc_pidmap() return actual error code sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
     [not found]     ` <1241425065670-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2009-05-04 18:11       ` Matt Helsley
     [not found]         ` <20090504181111.GM11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-04 18:29           ` Sukadev Bhattiprolu
2009-05-04  8:17   ` [RFC][PATCH 3/7] Add target_pid parameter to alloc_pidmap() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
     [not found]     ` <12414250651744-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2009-05-04 20:03       ` Serge E. Hallyn
     [not found]         ` <20090504200318.GA29491-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-05 22:23           ` Sukadev Bhattiprolu
     [not found]             ` <20090505222342.GB20515-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2009-05-05 23:01               ` Serge E. Hallyn
2009-05-04  8:17   ` [RFC][PATCH 4/7] Add target_pids parameter to alloc_pid() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
2009-05-04  8:17   ` [RFC][PATCH 5/7] Add target_pids parameter to copy_process() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
2009-05-04  8:17   ` [RFC][PATCH 6/7] Define do_fork_with_pids() sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
     [not found]     ` <12414250652549-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2009-05-04 18:26       ` Matt Helsley
     [not found]         ` <20090504182621.GO11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-04 18:32           ` Sukadev Bhattiprolu
2009-05-04  8:17   ` [RFC][PATCH 7/7] Define clone_with_pids syscall sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8
2009-05-04 18:20   ` [RFC][PATCH 1/7] Factor out code to allocate pidmap page Matt Helsley
     [not found]     ` <20090504182059.GN11734-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-05-04 18:27       ` Sukadev Bhattiprolu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.