From: "Serge E. Hallyn" <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
To: Nadia.Derbey-6ktuUTfB/bM@public.gmane.org
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: Re: [RFC PATCH 3/5] use next syscall data to predefine process ids
Date: Tue, 8 Jul 2008 14:49:26 -0500 [thread overview]
Message-ID: <20080708194926.GC22904@us.ibm.com> (raw)
In-Reply-To: <20080708112458.946320000-6ktuUTfB/bM@public.gmane.org>
Quoting Nadia.Derbey-6ktuUTfB/bM@public.gmane.org (Nadia.Derbey-6ktuUTfB/bM@public.gmane.org):
> [PATCH 03/05]
>
> This patch uses the value written into the next_syscall_data proc file
> as a target upid nr for the next process to be created.
> The following syscalls have a new behavior if next_syscall_data is set:
> . fork()
> . vfork()
> . clone()
>
> In the current version, if the process belongs to nested namespaces, only
> the upper namespace level upid nr is allowed to be predefined, since there
> is not yet a way to take a snapshot of upid nrs at all namespaces levels.
>
> But this can easily be extended in the future.
>
> Signed-off-by: Nadia Derbey <Nadia.Derbey-6ktuUTfB/bM@public.gmane.org>
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
thanks,
-serge
>
> ---
> include/linux/next_syscall_data.h | 2
> kernel/fork.c | 5 -
> kernel/pid.c | 116 +++++++++++++++++++++++++++++++-------
> 3 files changed, 102 insertions(+), 21 deletions(-)
>
> Index: linux-2.6.26-rc8-mm1/kernel/pid.c
> ===================================================================
> --- linux-2.6.26-rc8-mm1.orig/kernel/pid.c 2008-07-08 12:12:39.000000000 +0200
> +++ linux-2.6.26-rc8-mm1/kernel/pid.c 2008-07-08 12:24:04.000000000 +0200
> @@ -122,6 +122,26 @@ static void free_pidmap(struct upid *upi
> atomic_inc(&map->nr_free);
> }
>
> +static inline int alloc_pidmap_page(struct pidmap *map)
> +{
> + if (unlikely(!map->page)) {
> + void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> + /*
> + * Free the page if someone raced with us
> + * installing it:
> + */
> + spin_lock_irq(&pidmap_lock);
> + if (map->page)
> + kfree(page);
> + else
> + map->page = page;
> + spin_unlock_irq(&pidmap_lock);
> + if (unlikely(!map->page))
> + return -1;
> + }
> + return 0;
> +}
> +
> static int alloc_pidmap(struct pid_namespace *pid_ns)
> {
> int i, offset, max_scan, pid, last = pid_ns->last_pid;
> @@ -134,21 +154,8 @@ static int alloc_pidmap(struct pid_names
> map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
> max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
> for (i = 0; i <= max_scan; ++i) {
> - if (unlikely(!map->page)) {
> - void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> - /*
> - * Free the page if someone raced with us
> - * installing it:
> - */
> - spin_lock_irq(&pidmap_lock);
> - if (map->page)
> - kfree(page);
> - else
> - map->page = page;
> - spin_unlock_irq(&pidmap_lock);
> - if (unlikely(!map->page))
> - break;
> - }
> + if (unlikely(alloc_pidmap_page(map)))
> + break;
> if (likely(atomic_read(&map->nr_free))) {
> do {
> if (!test_and_set_bit(offset, map->page)) {
> @@ -182,6 +189,33 @@ static int alloc_pidmap(struct pid_names
> return -1;
> }
>
> +/*
> + * Return 0 if successful (i.e. next_nr could be assigned as a upid nr).
> + * -errno else
> + */
> +static int alloc_fixed_pidmap(struct pid_namespace *pid_ns, int next_nr)
> +{
> + int offset;
> + struct pidmap *map;
> +
> + if (next_nr < RESERVED_PIDS || next_nr >= pid_max)
> + return -EINVAL;
> +
> + map = &pid_ns->pidmap[next_nr / BITS_PER_PAGE];
> +
> + if (unlikely(alloc_pidmap_page(map)))
> + return -ENOMEM;
> +
> + offset = next_nr & BITS_PER_PAGE_MASK;
> + if (test_and_set_bit(offset, map->page))
> + return -EBUSY;
> +
> + atomic_dec(&map->nr_free);
> + pid_ns->last_pid = max(pid_ns->last_pid, next_nr);
> +
> + return 0;
> +}
> +
> int next_pidmap(struct pid_namespace *pid_ns, int last)
> {
> int offset;
> @@ -239,6 +273,24 @@ void free_pid(struct pid *pid)
> call_rcu(&pid->rcu, delayed_put_pid);
> }
>
> +/*
> + * Sets a predefined upid nr for the process' upper namespace level
> + */
> +static int set_predefined_pid(struct pid_namespace *ns, struct pid *pid,
> + int next_nr)
> +{
> + int i = ns->level;
> + int rc;
> +
> + rc = alloc_fixed_pidmap(ns, next_nr);
> + if (rc < 0)
> + return rc;
> +
> + pid->numbers[i].nr = next_nr;
> + pid->numbers[i].ns = ns;
> + return 0;
> +}
> +
> struct pid *alloc_pid(struct pid_namespace *ns)
> {
> struct pid *pid;
> @@ -248,14 +300,41 @@ struct pid *alloc_pid(struct pid_namespa
> struct upid *upid;
>
> pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
> - if (!pid)
> + if (!pid) {
> + pid = ERR_PTR(-ENOMEM);
> goto out;
> + }
>
> tmp = ns;
> - for (i = ns->level; i >= 0; i--) {
> + i = ns->level;
> + if (unlikely(next_data_set(current))) {
> + /*
> + * There is a upid nr specified, use it instead of letting
> + * the kernel chose it for us.
> + */
> + int next_nr = get_next_data(current);
> + int rc;
> +
> + reset_next_syscall_data(current);
> + rc = set_predefined_pid(tmp, pid, next_nr);
> + if (rc < 0) {
> + pid = ERR_PTR(rc);
> + goto out_free;
> + }
> + /* Go up one level */
> + tmp = tmp->parent;
> + i--;
> + }
> +
> + /*
> + * Let the lower levels upid nrs be automatically allocated
> + */
> + for ( ; i >= 0; i--) {
> nr = alloc_pidmap(tmp);
> - if (nr < 0)
> + if (nr < 0) {
> + pid = ERR_PTR(-ENOMEM);
> goto out_free;
> + }
>
> pid->numbers[i].nr = nr;
> pid->numbers[i].ns = tmp;
> @@ -284,7 +363,6 @@ out_free:
> free_pidmap(pid->numbers + i);
>
> kmem_cache_free(ns->pid_cachep, pid);
> - pid = NULL;
> goto out;
> }
>
> Index: linux-2.6.26-rc8-mm1/kernel/fork.c
> ===================================================================
> --- linux-2.6.26-rc8-mm1.orig/kernel/fork.c 2008-07-08 12:12:39.000000000 +0200
> +++ linux-2.6.26-rc8-mm1/kernel/fork.c 2008-07-08 12:22:47.000000000 +0200
> @@ -1118,10 +1118,11 @@ static struct task_struct *copy_process(
> goto bad_fork_cleanup_io;
>
> if (pid != &init_struct_pid) {
> - retval = -ENOMEM;
> pid = alloc_pid(task_active_pid_ns(p));
> - if (!pid)
> + if (IS_ERR(pid)) {
> + retval = PTR_ERR(pid);
> goto bad_fork_cleanup_io;
> + }
>
> if (clone_flags & CLONE_NEWPID) {
> retval = pid_ns_prepare_proc(task_active_pid_ns(p));
> Index: linux-2.6.26-rc8-mm1/include/linux/next_syscall_data.h
> ===================================================================
> --- linux-2.6.26-rc8-mm1.orig/include/linux/next_syscall_data.h 2008-07-08 12:12:39.000000000 +0200
> +++ linux-2.6.26-rc8-mm1/include/linux/next_syscall_data.h 2008-07-08 12:22:47.000000000 +0200
> @@ -5,6 +5,7 @@
> * following is supported today:
> * . object creation with a predefined id
> * . for a sysv ipc object
> + * . for a process
> */
>
> #ifndef _LINUX_NEXT_SYSCALL_DATA_H
> @@ -18,6 +19,7 @@
> * For example, it can be used to pre-set the id of the object to be created
> * by next syscall. The following syscalls support this feature:
> * . msgget(), semget(), shmget()
> + * . fork(), vfork(), clone()
> */
> struct next_syscall_data {
> int ndata;
>
> --
next prev parent reply other threads:[~2008-07-08 19:49 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-07-08 11:24 [RFC PATCH 0/5] Resend -v2 - Use procfs to change a syscall behavior Nadia.Derbey-6ktuUTfB/bM
2008-07-08 11:24 ` [RFC PATCH 1/5] adds the procfs facilities Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112457.994105000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 19:32 ` Serge E. Hallyn
2008-07-08 11:24 ` [RFC PATCH 2/5] use next syscall data to predefine ipc objects ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112458.416998000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 19:38 ` Serge E. Hallyn
2008-07-08 11:24 ` [RFC PATCH 3/5] use next syscall data to predefine process ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112458.946320000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 19:49 ` Serge E. Hallyn [this message]
2008-07-10 0:27 ` Eric W. Biederman
[not found] ` <m1hcayfusi.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-10 8:32 ` Nadia Derbey
[not found] ` <4875C932.2020503-6ktuUTfB/bM@public.gmane.org>
2008-07-10 9:36 ` Eric W. Biederman
2008-07-08 11:24 ` [RFC PATCH 4/5] use next syscall data to change the behavior of IPC_SET Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112459.231249000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 19:56 ` Serge E. Hallyn
2008-07-08 11:24 ` [RFC PATCH 5/5] use next syscall data to predefine the file descriptor value Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080708112459.632357000-6ktuUTfB/bM@public.gmane.org>
2008-07-08 20:14 ` Serge E. Hallyn
[not found] ` <20080708201452.GE22904-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-09 5:00 ` kathys
[not found] ` <487445E4.6060107-8fk3Idey6ehBDgjK7y7TUQ@public.gmane.org>
2008-07-10 6:12 ` Nadia Derbey
[not found] ` <4875A849.1030206-6ktuUTfB/bM@public.gmane.org>
2008-07-14 4:58 ` kathys
2008-07-10 0:32 ` Eric W. Biederman
[not found] ` <m1tzeyefz9.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-10 6:25 ` Nadia Derbey
[not found] ` <20080708112422.164370000-6ktuUTfB/bM@public.gmane.org>
2008-07-09 22:10 ` [Devel] [RFC PATCH 0/5] Resend -v2 - Use procfs to change a syscall behavior Alexey Dobriyan
[not found] ` <20080709221028.GA4926-QDJVlCTZ4KWTKS93B3g+7KFoa47nwP16@public.gmane.org>
2008-07-10 0:43 ` Eric W. Biederman
[not found] ` <m1tzeyd0x3.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-10 1:39 ` Alexey Dobriyan
[not found] ` <20080710013915.GB8327-QDJVlCTZ4KWTKS93B3g+7KFoa47nwP16@public.gmane.org>
2008-07-10 2:14 ` Eric W. Biederman
2008-07-15 18:18 ` Eric W. Biederman
2008-07-17 22:42 ` Oren Laadan
[not found] ` <487FCAF0.70607-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-07-18 1:09 ` Matt Helsley
[not found] ` <1216343365.4844.308.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2008-07-18 2:49 ` Eric W. Biederman
2008-07-18 2:40 ` Eric W. Biederman
2008-07-10 16:01 ` Dave Hansen
2008-07-10 0:36 ` Eric W. Biederman
[not found] ` <m1lk0aefs1.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-10 9:54 ` Nadia Derbey
-- strict thread matches above, loose matches on Subject: below --
2008-07-03 14:40 [RFC PATCH 0/5] Resend " Nadia.Derbey-6ktuUTfB/bM
2008-07-03 14:40 ` [RFC PATCH 3/5] use next syscall data to predefine process ids Nadia.Derbey-6ktuUTfB/bM
[not found] ` <20080703144225.489624000-6ktuUTfB/bM@public.gmane.org>
2008-07-07 18:54 ` Serge E. Hallyn
[not found] ` <20080707185424.GA25934-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-08 5:44 ` Nadia Derbey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080708194926.GC22904@us.ibm.com \
--to=serue-r/jw6+rmf7hqt0dzr+alfa@public.gmane.org \
--cc=Nadia.Derbey-6ktuUTfB/bM@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.