From: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
To: Nadia.Derbey-6ktuUTfB/bM@public.gmane.org
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
pavel-+ZI9xUNit7I@public.gmane.org
Subject: Re: [RFC PATCH 3/5] use next syscall data to predefine process ids
Date: Mon, 7 Jul 2008 13:54:24 -0500 [thread overview]
Message-ID: <20080707185424.GA25934@us.ibm.com> (raw)
In-Reply-To: <20080703144225.489624000-6ktuUTfB/bM@public.gmane.org>
Quoting Nadia.Derbey-6ktuUTfB/bM@public.gmane.org (Nadia.Derbey-6ktuUTfB/bM@public.gmane.org):
> [PATCH 03/05]
>
> This patch uses the value written into the next_syscall_data proc file
> as a target upid nr for the next process to be created.
> The following syscalls have a new behavior if next_syscall_data is set:
> . fork()
> . vfork()
> . clone()
>
> In the current version, if the process belongs to nested namespaces, only
> the upper namespace level upid nr is allowed to be predefined, since there
> is not yet a way to take a snapshot of upid nrs at all namespaces levels.
>
> But this can easily be extended in the future.
Good point, we will want to discuss the right way to dump that data. Do
we add a new file under /proc/<pid>, use /proc/pid/status, or find some
other way?
> Signed-off-by: Nadia Derbey <Nadia.Derbey-6ktuUTfB/bM@public.gmane.org>
>
> ---
> include/linux/next_syscall_data.h | 2
> include/linux/pid.h | 2
> kernel/fork.c | 3 -
> kernel/pid.c | 111 ++++++++++++++++++++++++++++++++------
> 4 files changed, 98 insertions(+), 20 deletions(-)
>
> Index: linux-2.6.26-rc5-mm3/kernel/pid.c
> ===================================================================
> --- linux-2.6.26-rc5-mm3.orig/kernel/pid.c 2008-07-01 10:25:46.000000000 +0200
> +++ linux-2.6.26-rc5-mm3/kernel/pid.c 2008-07-01 11:25:38.000000000 +0200
> @@ -122,6 +122,26 @@ static void free_pidmap(struct upid *upi
> atomic_inc(&map->nr_free);
> }
>
> +static inline int alloc_pidmap_page(struct pidmap *map)
> +{
> + if (unlikely(!map->page)) {
> + void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> + /*
> + * Free the page if someone raced with us
> + * installing it:
> + */
> + spin_lock_irq(&pidmap_lock);
> + if (map->page)
> + kfree(page);
> + else
> + map->page = page;
> + spin_unlock_irq(&pidmap_lock);
> + if (unlikely(!map->page))
> + return -1;
> + }
> + return 0;
> +}
> +
> static int alloc_pidmap(struct pid_namespace *pid_ns)
> {
> int i, offset, max_scan, pid, last = pid_ns->last_pid;
> @@ -134,21 +154,8 @@ static int alloc_pidmap(struct pid_names
> map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
> max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
> for (i = 0; i <= max_scan; ++i) {
> - if (unlikely(!map->page)) {
> - void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> - /*
> - * Free the page if someone raced with us
> - * installing it:
> - */
> - spin_lock_irq(&pidmap_lock);
> - if (map->page)
> - kfree(page);
> - else
> - map->page = page;
> - spin_unlock_irq(&pidmap_lock);
> - if (unlikely(!map->page))
> - break;
> - }
> + if (unlikely(alloc_pidmap_page(map)))
> + break;
> if (likely(atomic_read(&map->nr_free))) {
> do {
> if (!test_and_set_bit(offset, map->page)) {
> @@ -182,6 +189,33 @@ static int alloc_pidmap(struct pid_names
> return -1;
> }
>
> +/*
> + * Return 0 if successful (i.e. next_nr could be assigned as a upid nr).
> + * -errno else
> + */
> +static int alloc_fixed_pidmap(struct pid_namespace *pid_ns, int next_nr)
> +{
> + int offset;
> + struct pidmap *map;
> +
> + if (next_nr < RESERVED_PIDS || next_nr >= pid_max)
> + return -EINVAL;
> +
> + map = &pid_ns->pidmap[next_nr / BITS_PER_PAGE];
> +
> + if (unlikely(alloc_pidmap_page(map)))
> + return -ENOMEM;
> +
> + offset = next_nr & BITS_PER_PAGE_MASK;
> + if (test_and_set_bit(offset, map->page))
> + return -EBUSY;
> +
> + atomic_dec(&map->nr_free);
> + pid_ns->last_pid = max(pid_ns->last_pid, next_nr);
> +
> + return 0;
> +}
> +
> int next_pidmap(struct pid_namespace *pid_ns, int last)
> {
> int offset;
> @@ -239,7 +273,25 @@ void free_pid(struct pid *pid)
> call_rcu(&pid->rcu, delayed_put_pid);
> }
>
> -struct pid *alloc_pid(struct pid_namespace *ns)
> +/*
> + * Sets a predefined upid nr for the process' upper namespace level
> + */
> +static int set_predefined_pid(struct pid_namespace *ns, struct pid *pid,
> + int next_nr)
> +{
> + int i = ns->level;
> + int rc;
> +
> + rc = alloc_fixed_pidmap(ns, next_nr);
> + if (rc < 0)
> + return rc;
> +
> + pid->numbers[i].nr = next_nr;
> + pid->numbers[i].ns = ns;
> + return 0;
> +}
> +
> +struct pid *alloc_pid(struct pid_namespace *ns, int *retval)
Is there a reason why you can't return retval using
return ERR_PTR(retval);
instead of using an additional argument? Then at copy_process,
after the call, do
if (IS_ERR(pid))
retval = PTR_ERR(pid);
?
> {
> struct pid *pid;
> enum pid_type type;
> @@ -247,12 +299,37 @@ struct pid *alloc_pid(struct pid_namespa
> struct pid_namespace *tmp;
> struct upid *upid;
>
> + *retval = -ENOMEM;
> pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
> if (!pid)
> goto out;
>
> tmp = ns;
> - for (i = ns->level; i >= 0; i--) {
> + i = ns->level;
> + if (next_data_set(current)) {
> + /*
> + * There is a upid nr specified, use it instead of letting
> + * the kernel chose it for us.
> + */
> + int next_nr = get_next_data(current);
> + int rc;
> +
> + rc = set_predefined_pid(tmp, pid, next_nr);
> + if (rc < 0) {
> + *retval = rc;
> + goto out_free;
Again, I'd argue for resetting the syscall data on failure.
> + }
> + /* Go up one level */
> + tmp = tmp->parent;
> + i--;
> + reset_next_syscall_data(current);
> + }
> +
> + /*
> + * Let the lower levels upid nrs be automatically allocated
> + */
> + *retval = -ENOMEM;
> + for ( ; i >= 0; i--) {
> nr = alloc_pidmap(tmp);
> if (nr < 0)
> goto out_free;
> Index: linux-2.6.26-rc5-mm3/include/linux/pid.h
> ===================================================================
> --- linux-2.6.26-rc5-mm3.orig/include/linux/pid.h 2008-07-01 10:25:46.000000000 +0200
> +++ linux-2.6.26-rc5-mm3/include/linux/pid.h 2008-07-01 10:49:07.000000000 +0200
> @@ -121,7 +121,7 @@ extern struct pid *find_get_pid(int nr);
> extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
> int next_pidmap(struct pid_namespace *pid_ns, int last);
>
> -extern struct pid *alloc_pid(struct pid_namespace *ns);
> +extern struct pid *alloc_pid(struct pid_namespace *, int *);
> extern void free_pid(struct pid *pid);
>
> /*
> Index: linux-2.6.26-rc5-mm3/kernel/fork.c
> ===================================================================
> --- linux-2.6.26-rc5-mm3.orig/kernel/fork.c 2008-07-01 10:25:46.000000000 +0200
> +++ linux-2.6.26-rc5-mm3/kernel/fork.c 2008-07-01 10:49:07.000000000 +0200
> @@ -1110,8 +1110,7 @@ static struct task_struct *copy_process(
> goto bad_fork_cleanup_io;
>
> if (pid != &init_struct_pid) {
> - retval = -ENOMEM;
> - pid = alloc_pid(task_active_pid_ns(p));
> + pid = alloc_pid(task_active_pid_ns(p), &retval);
> if (!pid)
> goto bad_fork_cleanup_io;
>
> Index: linux-2.6.26-rc5-mm3/include/linux/next_syscall_data.h
> ===================================================================
> --- linux-2.6.26-rc5-mm3.orig/include/linux/next_syscall_data.h 2008-07-01 10:41:36.000000000 +0200
> +++ linux-2.6.26-rc5-mm3/include/linux/next_syscall_data.h 2008-07-01 11:09:35.000000000 +0200
> @@ -5,6 +5,7 @@
> * following is supported today:
> * . object creation with a predefined id
> * . for a sysv ipc object
> + * . for a process
> *
> */
>
> @@ -19,6 +20,7 @@
> * For example, it can be used to pre-set the id of the object to be created
> * by next syscall. The following syscalls support this feature:
> * . msgget(), semget(), shmget()
> + * . fork(), vfork(), clone()
> */
> struct next_syscall_data {
> int ndata;
>
> --
next prev parent reply other threads:[~2008-07-07 18:54 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-07-03 14:40 [RFC PATCH 0/5] Resend - Use procfs to change a syscall behavior Nadia.Derbey-6ktuUTfB/bM
2008-07-03 14:40 ` [RFC PATCH 1/5] adds the procfs facilities Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080703144224.723883000-6ktuUTfB/bM@public.gmane.org>
2008-07-07 18:30 ` Serge E. Hallyn
[not found] ` <20080707183030.GA22937-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 5:25 ` Nadia Derbey
2008-07-03 14:40 ` [RFC PATCH 2/5] use next syscall data to predefine ipc objects ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080703144224.982195000-6ktuUTfB/bM@public.gmane.org>
2008-07-07 18:35 ` Serge E. Hallyn
[not found] ` <20080707183512.GB22937-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 5:30 ` Nadia Derbey
2008-07-03 14:40 ` [RFC PATCH 3/5] use next syscall data to predefine process ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080703144225.489624000-6ktuUTfB/bM@public.gmane.org>
2008-07-07 18:54 ` Serge E. Hallyn [this message]
[not found] ` <20080707185424.GA25934-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 5:44 ` Nadia Derbey
2008-07-03 14:40 ` [RFC PATCH 4/5] use next syscall data to change the behavior of IPC_SET Nadia.Derbey-6ktuUTfB/bM
2008-07-03 14:40 ` [RFC PATCH 5/5] use next syscall data to predefine the file descriptor value Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080703144013.737951000-6ktuUTfB/bM@public.gmane.org>
2008-07-04 10:27 ` [RFC PATCH 0/5] Resend - Use procfs to change a syscall behavior Pavel Machek
[not found] ` <20080704102702.GB4531-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-04 12:07 ` Nadia Derbey
[not found] ` <486E1276.2080605-6ktuUTfB/bM@public.gmane.org>
2008-07-08 10:51 ` Pavel Machek
[not found] ` <20080708105143.GA15311-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-08 21:47 ` Serge E. Hallyn
[not found] ` <20080708214721.GA1972-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 21:53 ` Pavel Machek
[not found] ` <20080708215315.GD17083-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-10 6:54 ` Nadia Derbey
[not found] ` <4875B212.5030604-6ktuUTfB/bM@public.gmane.org>
2008-07-10 7:01 ` [Devel] " Paul Menage
[not found] ` <6599ad830807100001j3f3a6cf2y7a19dda9382edb2c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2008-07-10 9:14 ` Nadia Derbey
[not found] ` <4875D2EA.4010407-6ktuUTfB/bM@public.gmane.org>
2008-07-10 9:30 ` Paul Menage
[not found] ` <6599ad830807100230k2f3f3551sa4b804f4c20b43fe-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2008-07-10 10:11 ` Nadia Derbey
2008-07-10 7:42 ` Nadia Derbey
[not found] ` <4875BD4B.2070402-6ktuUTfB/bM@public.gmane.org>
2008-07-10 8:54 ` Pavel Machek
[not found] ` <20080710085406.GA13258-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-10 9:29 ` Nadia Derbey
2008-07-10 17:53 ` Dave Hansen
2008-07-10 18:45 ` Pavel Machek
[not found] ` <20080710184512.GA19428-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-10 19:04 ` Dave Hansen
2008-07-10 19:27 ` Serge E. Hallyn
2008-07-07 19:01 ` Serge E. Hallyn
[not found] ` <20080707190119.GB25934-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 10:52 ` Pavel Machek
[not found] ` <20080708105228.GB15311-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-08 21:50 ` Serge E. Hallyn
[not found] ` <20080708215034.GB2179-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 21:58 ` Pavel Machek
[not found] ` <20080708215821.GE17083-I/5MKhXcvmPrBKCeMvbIDA@public.gmane.org>
2008-07-09 2:20 ` Serge E. Hallyn
[not found] ` <20080709022035.GA21249-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-10 7:58 ` Nadia Derbey
[not found] ` <4875C138.5060506-6ktuUTfB/bM@public.gmane.org>
2008-07-10 8:34 ` [Devel] " Paul Menage
[not found] ` <6599ad830807100134l362ab98bt868e078eeb17b838-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2008-07-10 9:38 ` Nadia Derbey
2008-07-17 22:26 ` Oren Laadan
-- strict thread matches above, loose matches on Subject: below --
2008-07-08 11:24 [RFC PATCH 0/5] Resend -v2 " Nadia.Derbey-6ktuUTfB/bM
2008-07-08 11:24 ` [RFC PATCH 3/5] use next syscall data to predefine process ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112458.946320000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 19:49 ` Serge E. Hallyn
2008-07-10 0:27 ` Eric W. Biederman
[not found] ` <m1hcayfusi.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-10 8:32 ` Nadia Derbey
[not found] ` <4875C932.2020503-6ktuUTfB/bM@public.gmane.org>
2008-07-10 9:36 ` Eric W. Biederman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080707185424.GA25934@us.ibm.com \
--to=serue-r/jw6+rmf7hqt0dzr+alfa@public.gmane.org \
--cc=Nadia.Derbey-6ktuUTfB/bM@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=pavel-+ZI9xUNit7I@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox