From: Oleg Nesterov <oleg@tv-sign.ru>
To: "Eric W. Biederman" <ebiederm@xmission.com>,
Andrew Morton <akpm@osdl.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 01/23] tref: Implement task references.
Date: Sat, 04 Mar 2006 20:30:03 +0300 [thread overview]
Message-ID: <4409CE9B.E9117FC5@tv-sign.ru> (raw)
In-Reply-To: 4409888C.71720720@tv-sign.ru
Oleg Nesterov wrote:
>
> "Eric W. Biederman" wrote:
> >
> > Oleg Nesterov <oleg@tv-sign.ru> writes:
> >
> > > fastcall void free_pidmap(int pid)
> > > {
> > > pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
> > > int offset = pid & BITS_PER_PAGE_MASK;
> > > struct pid_ref *ref;
> > >
> > > clear_bit(offset, map->page);
> > > atomic_inc(&map->nr_free);
> > >
> > > ref = find_pid_ref(pid);
> > > if (unlikely(ref != NULL)) {
> > > hlist_del_init(&ref->chain);
> > > ref->pid = 0;
> > > }
> > > }
> >
> > Ouch! I believe free_pidmap now needs the tasklist_lock so
> > we can free the pid and kill the pid_ref atomically. Otherwise
> > the pid could potentially get reused before we free the pid reference.
> > I think that means ensuring all of the callers take tasklist_lock.
>
> Yes, you are right. And do_fork() does free_pidmap() lockless in
> the error path. This path is not performance critical, so may be
> it is ok to add wrie_lock(tasklist) here.
Even better: don't use tasklist_lock at all. We can use pidmap_lock
instead, see the patch below.
I have added a simple find_task_by_pid_ref() helper, note that it
doesn't need pidmap_lock, and it doesn't need to check ref->pid != 0.
If the caller does read_lock(tasklist), then this helper can't return
unhashed task_struct, otherwise it is possible anyway.
Oleg.
(for review only)
--- 2.6.16-rc5/include/linux/sched.h~1_REF 2006-03-01 22:00:30.000000000 +0300
+++ 2.6.16-rc5/include/linux/sched.h 2006-03-04 22:56:44.000000000 +0300
@@ -1012,8 +1012,6 @@ extern struct task_struct init_task;
extern struct mm_struct init_mm;
-#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
extern void set_special_pids(pid_t session, pid_t pgrp);
extern void __set_special_pids(pid_t session, pid_t pgrp);
--- 2.6.16-rc5/include/linux/pid.h~1_REF 2006-03-01 22:00:29.000000000 +0300
+++ 2.6.16-rc5/include/linux/pid.h 2006-03-04 23:02:51.000000000 +0300
@@ -35,6 +35,9 @@ extern void FASTCALL(detach_pid(struct t
* held.
*/
extern struct pid *FASTCALL(find_pid(enum pid_type, int));
+extern struct task_struct *find_task_by_pid_type(int type, int pid);
+
+#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
extern int alloc_pidmap(void);
extern void FASTCALL(free_pidmap(int));
@@ -51,4 +54,23 @@ extern void FASTCALL(free_pidmap(int));
hlist_unhashed(&(task)->pids[type].pid_chain)); \
} \
+struct pid_ref
+{
+ pid_t pid;
+ int count;
+ struct hlist_node chain;
+};
+
+extern struct pid_ref *alloc_pid_ref(pid_t pid);
+extern void put_pid_ref(struct pid_ref *ref);
+
+static inline struct task_struct *find_task_by_pid_ref(struct pid_ref *ref,
+ enum pid_type type)
+{
+ if (!ref)
+ return NULL;
+
+ return find_task_by_pid_type(type, ref->pid);
+}
+
#endif /* _LINUX_PID_H */
--- 2.6.16-rc5/kernel/pid.c~1_REF 2006-03-01 22:03:25.000000000 +0300
+++ 2.6.16-rc5/kernel/pid.c 2006-03-04 22:27:51.000000000 +0300
@@ -28,9 +28,12 @@
#include <linux/hash.h>
#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
-static struct hlist_head *pid_hash[PIDTYPE_MAX];
+static struct hlist_head *pid_hash[PIDTYPE_MAX + 1];
static int pidhash_shift;
+#define ref_hashfn(pid) pid_hashfn(pid)
+#define ref_hash pid_hash[PIDTYPE_MAX]
+
int pid_max = PID_MAX_DEFAULT;
int last_pid;
@@ -62,13 +65,35 @@ static pidmap_t pidmap_array[PIDMAP_ENTR
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+static struct pid_ref *find_pid_ref(pid_t pid)
+{
+ struct hlist_node *elem;
+ struct pid_ref *ref;
+
+ hlist_for_each_entry(ref, elem, &ref_hash[ref_hashfn(pid)], chain)
+ if (ref->pid == pid)
+ return ref;
+
+ return NULL;
+}
+
fastcall void free_pidmap(int pid)
{
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
int offset = pid & BITS_PER_PAGE_MASK;
+ struct pid_ref *ref;
+ unsigned long flags;
clear_bit(offset, map->page);
atomic_inc(&map->nr_free);
+
+ spin_lock_irqsave(&pidmap_lock, flags);
+ ref = find_pid_ref(pid);
+ if (unlikely(ref != NULL)) {
+ hlist_del_init(&ref->chain);
+ ref->pid = 0;
+ }
+ spin_unlock_irqrestore(&pidmap_lock, flags);
}
int alloc_pidmap(void)
@@ -217,6 +242,48 @@ task_t *find_task_by_pid_type(int type,
EXPORT_SYMBOL(find_task_by_pid_type);
+static inline int pid_inuse(pid_t pid)
+{
+ pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ int offset = pid & BITS_PER_PAGE_MASK;
+
+ return likely(map->page) && test_bit(offset, map->page);
+}
+
+struct pid_ref *alloc_pid_ref(pid_t pid)
+{
+ struct pid_ref *ref;
+
+ spin_lock_irq(&pidmap_lock);
+ ref = find_pid_ref(pid);
+ if (ref)
+ ref->count++;
+ else if (pid_inuse(pid)) {
+ ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+ if (ref) {
+ ref->pid = pid;
+ ref->count = 1;
+ hlist_add_head(&ref->chain,
+ &ref_hash[ref_hashfn(pid)]);
+ }
+ }
+ spin_unlock_irq(&pidmap_lock);
+
+ return ref;
+}
+
+void free_pid_ref(struct pid_ref *ref)
+{
+ if (!ref)
+ return;
+
+ spin_lock_irq(&pidmap_lock);
+ if (!--ref->count) {
+ hlist_del_init(&ref->chain);
+ kfree(ref);
+ }
+ spin_lock_irq(&pidmap_lock);
+}
/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -233,9 +300,9 @@ void __init pidhash_init(void)
printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
pidhash_size, pidhash_shift,
- PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
+ (PIDTYPE_MAX + 1) * pidhash_size * sizeof(struct hlist_head));
- for (i = 0; i < PIDTYPE_MAX; i++) {
+ for (i = 0; i < PIDTYPE_MAX + 1; i++) {
pid_hash[i] = alloc_bootmem(pidhash_size *
sizeof(*(pid_hash[i])));
if (!pid_hash[i])
next prev parent reply other threads:[~2006-03-04 17:33 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-02-23 15:52 [PATCH 00/23] proc cleanup Eric W. Biederman
2006-02-23 15:54 ` [PATCH 01/23] tref: Implement task references Eric W. Biederman
2006-02-23 15:56 ` [PATCH 02/23] proc: Fix the .. inode number on /proc/<pid>/fd Eric W. Biederman
2006-02-23 15:57 ` [PATCH 03/23] proc: Remove useless BKL in proc_pid_readlink Eric W. Biederman
2006-02-23 15:58 ` [PATCH 04/23] proc: Remove unnecessary and misleading assignments from proc_pid_make_inode Eric W. Biederman
2006-02-23 16:00 ` [PATCH 05/23] proc: Simplify the ownership rules for /proc Eric W. Biederman
2006-02-23 16:02 ` Eric W. Biederman
2006-02-23 16:04 ` [PATCH 06/23] proc: Replace proc_inode.type with proc_inode.fd Eric W. Biederman
2006-02-23 16:05 ` [PATCH 07/23] proc: Remove bogus proc_task_permission Eric W. Biederman
2006-02-23 16:06 ` [PATCH 08/23] proc: Kill proc_mem_inode_operations Eric W. Biederman
2006-02-23 16:08 ` [PATCH 09/23] proc: Properly filter out files that are not visible to a process Eric W. Biederman
2006-02-23 16:10 ` [PATCH 10/23] proc: Fix the link count for /proc/<pid>/task Eric W. Biederman
2006-02-23 16:12 ` [PATCH 11/23] proc: Move proc_maps_operations into task_mmu.c Eric W. Biederman
2006-02-23 16:15 ` [PATCH 12/23] proc: Rewrite the proc dentry flush on exit optimization Eric W. Biederman
2006-02-23 16:16 ` [PATCH 13/23] proc: Close the race of a process dying durning lookup Eric W. Biederman
2006-02-23 16:18 ` [PATCH 14/23] proc: Make PROC_NUMBUF the buffer size for holding a integers as strings Eric W. Biederman
2006-02-23 16:20 ` [PATCH 15/23] proc: refactor reading directories of tasks Eric W. Biederman
2006-02-23 16:23 ` [PATCH 16/23] proc: Don't lock task_structs indefinitely Eric W. Biederman
2006-02-23 16:24 ` [PATCH 17/23] proc: Give the root directory a task Eric W. Biederman
2006-02-23 16:25 ` [PATCH 18/23] proc: Reorder the functions in base.c Eric W. Biederman
2006-02-23 16:27 ` [PATCH 19/23] proc: Modify proc_pident_lookup to be completely table driven Eric W. Biederman
2006-02-23 16:28 ` [PATCH 20/23] proc: Make the generation of the self symlink " Eric W. Biederman
2006-02-23 16:30 ` [PATCH 21/23] proc: Factor out an instantiate method from every lookup method Eric W. Biederman
2006-02-23 16:32 ` [PATCH 22/23] proc: Remove the hard coded inode numbers Eric W. Biederman
2006-02-23 16:34 ` [PATCH 23/23] proc: Merge proc_tid_attr and proc_tgid_attr Eric W. Biederman
2006-02-23 16:49 ` [PATCH 01/23] tref: Implement task references Eric W. Biederman
2006-03-02 19:16 ` Oleg Nesterov
2006-03-02 20:37 ` Oleg Nesterov
2006-03-02 22:19 ` Eric W. Biederman
2006-03-03 16:56 ` Oleg Nesterov
2006-03-03 17:48 ` Eric W. Biederman
2006-03-04 11:16 ` Eric W. Biederman
2006-03-04 12:31 ` Oleg Nesterov
2006-03-04 17:30 ` Oleg Nesterov [this message]
2006-03-06 21:06 ` Oleg Nesterov
2006-03-06 22:18 ` Eric W. Biederman
2006-03-07 20:44 ` Oleg Nesterov
2006-03-07 1:39 ` Eric W. Biederman
2006-03-07 20:38 ` Oleg Nesterov
2006-03-07 13:12 ` Eric W. Biederman
2006-03-07 21:02 ` Oleg Nesterov
2006-03-07 23:00 ` Eric W. Biederman
2006-03-03 19:23 ` Oleg Nesterov
2006-03-04 10:51 ` Eric W. Biederman
2006-02-25 12:27 ` [PATCH 00/23] proc cleanup Andrew Morton
2006-02-25 13:34 ` Eric W. Biederman
2006-02-25 15:20 ` Eric W. Biederman
2006-02-27 15:26 ` Serge E. Hallyn
2006-02-27 15:56 ` Eric W. Biederman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4409CE9B.E9117FC5@tv-sign.ru \
--to=oleg@tv-sign.ru \
--cc=akpm@osdl.org \
--cc=ebiederm@xmission.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.