From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932358AbXGYAgl (ORCPT ); Tue, 24 Jul 2007 20:36:41 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1760256AbXGYAg3 (ORCPT ); Tue, 24 Jul 2007 20:36:29 -0400 Received: from e35.co.us.ibm.com ([32.97.110.153]:38774 "EHLO e35.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1760204AbXGYAg1 (ORCPT ); Tue, 24 Jul 2007 20:36:27 -0400 Date: Tue, 24 Jul 2007 17:36:19 -0700 From: sukadev@us.ibm.com To: Pavel Emelianov Cc: Andrew Morton , Serge Hallyn , "Eric W. Biederman" , Linux Containers , Linux Kernel Mailing List , Kirill Korotaev Subject: Re: [PATCH 13/16] Switch to operating with pid_numbers instead of pids Message-ID: <20070725003619.GF3287@us.ibm.com> References: <468DF6F7.1010906@openvz.org> <468DF907.2060809@openvz.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <468DF907.2060809@openvz.org> User-Agent: Mutt/1.4.2.2i X-Operating-System: Linux 2.0.32 on an i486 Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Pavel Emelianov [xemul@openvz.org] wrote: | Make alloc_pid() initialize pid_numbers and hash them | into the hashtable, not the struct pid itself. | | Signed-off-by: Pavel Emelianov | | --- | | pid.c | 47 +++++++++++++++++++++++++++++++++-------------- | 1 files changed, 33 insertions(+), 14 deletions(-) | | --- ./kernel/pid.c.ve12 2007-07-05 11:06:41.000000000 +0400 | +++ ./kernel/pid.c 2007-07-05 11:08:23.000000000 +0400 | @@ -28,8 +28,10 @@ | #include | #include | #include | +#include | | -#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | +#define pid_hashfn(nr, ns) \ | + hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) | static struct hlist_head *pid_hash; | static int pidhash_shift; | struct pid init_struct_pid = INIT_STRUCT_PID; | @@ -194,7 +198,7 @@ fastcall void put_pid(struct pid *pid) | if (!pid) | return; | | - ns = pid->numbers[0].ns; | + ns = pid->numbers[pid->level].ns; | if ((atomic_read(&pid->count) == 1) || | atomic_dec_and_test(&pid->count)) | kmem_cache_free(ns->pid_cachep, pid); | @@ -210,13 +214,17 @@ static void delayed_put_pid(struct rcu_h | fastcall void free_pid(struct pid *pid) | { | /* We can be called with write_lock_irq(&tasklist_lock) held */ | + int i; | unsigned long flags; | | spin_lock_irqsave(&pidmap_lock, flags); | - hlist_del_rcu(&pid->pid_chain); | + for (i = 0; i <= pid->level; i++) | + hlist_del_rcu(&pid->numbers[i].pid_chain); | spin_unlock_irqrestore(&pidmap_lock, flags); | | - free_pidmap(&init_pid_ns, pid->nr); | + for (i = 0; i <= pid->level; i++) | + free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); | + | call_rcu(&pid->rcu, delayed_put_pid); | } | | @@ -224,30 +232,43 @@ struct pid *alloc_pid(struct pid_namespa | { | struct pid *pid; | enum pid_type type; | - int nr = -1; | + struct pid_namespace *ns; | + int i, nr; | | - pid = kmem_cache_alloc(init_pid_ns.pid_cachep, GFP_KERNEL); | + pid = kmem_cache_alloc(pid_ns->pid_cachep, GFP_KERNEL); | if (!pid) | goto out; | | - nr = alloc_pidmap(current->nsproxy->pid_ns); | - if (nr < 0) | - goto out_free; | + ns = pid_ns; | + for (i = pid_ns->level; i >= 0; i--) { | + nr = alloc_pidmap(ns); | + if (nr < 0) | + goto out_free; If pid_ns->level is say 3 and alloc_pidmap() succeeds when i=0,1 and fails when i=2, we would try to free_pidmap() even from pid->pid_number[2].pid_ns. This would incorrectly a) drop reference count on that pid namespace, and incorrectly increment pidmap->nr_free. Should we use kmem_cache_zalloc() and check for a non-NULL pid_ns before calling free_pidmap() below ? | | + pid->numbers[i].nr = nr; | + pid->numbers[i].ns = ns; | + ns = ns->parent; | + } | + | + pid->level = pid_ns->level; | atomic_set(&pid->count, 1); | - pid->nr = nr; | for (type = 0; type < PIDTYPE_MAX; ++type) | INIT_HLIST_HEAD(&pid->tasks[type]); | | spin_lock_irq(&pidmap_lock); | - hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]); | + for (i = pid->level; i >= 0; i--) | + hlist_add_head_rcu(&pid->numbers[i].pid_chain, | + &pid_hash[pid_hashfn(pid->numbers[i].nr, | + pid->numbers[i].ns)]); | spin_unlock_irq(&pidmap_lock); | - | out: | return pid; | | out_free: | - kmem_cache_free(init_pid_ns.pid_cachep, pid); | + for (i++; i <= pid->level; i++) | + free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); i.e all pid->numbers[] may not be initialized here right ?