From: "Mika Penttilä" <mika.penttila@kolumbus.fi>
To: Manfred Spraul <manfred@colorfullife.com>
Cc: linux-kernel@vger.kernel.org
Subject: Re: [RFC,PATCH] cookie based proc_pid_readdir
Date: Tue, 06 Jan 2004 17:16:23 +0200 [thread overview]
Message-ID: <3FFAD147.7050907@kolumbus.fi> (raw)
In-Reply-To: <3FFABA20.3030304@colorfullife.com>
afaics, clist_head from f_version is leaked if not whole directory read.
--Mika
Manfred Spraul wrote:
> proc_dir_readdir skips over tasks if tasks exit between the individual
> get_tgid_list calls. The only approach that guarantees that no tasks
> are skipped is to add a cookie into the task list and restart from
> that cookie.
>
> Attached is a proof of concept patch that adds such cookies - please
> comment.
>
> --
> Manfred
>
>------------------------------------------------------------------------
>
>// $Header$
>// Kernel Version:
>// VERSION = 2
>// PATCHLEVEL = 6
>// SUBLEVEL = 0
>// EXTRAVERSION =
>--- 2.6/include/linux/list.h 2003-12-20 09:19:13.000000000 +0100
>+++ build-2.6/include/linux/list.h 2004-01-06 12:26:32.000000000 +0100
>@@ -39,6 +39,16 @@
> } while (0)
>
> /*
>+ * special linked lists that can contain invisible
>+ * members. Members with nonzero is_cookie should be
>+ * skipped.
>+ */
>+struct clist_head {
>+ struct list_head l;
>+ int is_cookie;
>+};
>+
>+/*
> * Insert a new entry between two known consecutive entries.
> *
> * This is only for internal list manipulation where we know
>--- 2.6/include/linux/proc_fs.h 2003-10-25 20:42:42.000000000 +0200
>+++ build-2.6/include/linux/proc_fs.h 2004-01-06 12:48:15.000000000 +0100
>@@ -95,6 +95,7 @@
> struct dentry *proc_pid_unhash(struct task_struct *p);
> void proc_pid_flush(struct dentry *proc_dentry);
> int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
>+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin);
>
> extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
> struct proc_dir_entry *parent);
>--- 2.6/include/linux/sched.h 2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/include/linux/sched.h 2004-01-06 12:31:57.000000000 +0100
>@@ -352,7 +352,7 @@
> cpumask_t cpus_allowed;
> unsigned int time_slice, first_time_slice;
>
>- struct list_head tasks;
>+ struct clist_head tasks;
> struct list_head ptrace_children;
> struct list_head ptrace_list;
>
>@@ -719,18 +719,31 @@
>
> #define REMOVE_LINKS(p) do { \
> if (thread_group_leader(p)) \
>- list_del_init(&(p)->tasks); \
>+ list_del_init(&(p)->tasks.l); \
> remove_parent(p); \
> } while (0)
>
> #define SET_LINKS(p) do { \
> if (thread_group_leader(p)) \
>- list_add_tail(&(p)->tasks,&init_task.tasks); \
>+ list_add_tail(&(p)->tasks.l,&init_task.tasks.l); \
> add_parent(p, (p)->parent); \
> } while (0)
>
>-#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
>-#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
>+static inline task_t * next_task(task_t *p)
>+{
>+ do {
>+ p = list_entry((p)->tasks.l.next, struct task_struct, tasks.l);
>+ } while(p->tasks.is_cookie);
>+ return p;
>+}
>+
>+static inline task_t * prev_task(task_t *p)
>+{
>+ do {
>+ p = list_entry((p)->tasks.l.prev, struct task_struct, tasks.l);
>+ } while(p->tasks.is_cookie);
>+ return p;
>+}
>
> #define for_each_process(p) \
> for (p = &init_task ; (p = next_task(p)) != &init_task ; )
>--- 2.6/include/linux/init_task.h 2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/include/linux/init_task.h 2004-01-06 12:32:51.000000000 +0100
>@@ -75,7 +75,7 @@
> .active_mm = &init_mm, \
> .run_list = LIST_HEAD_INIT(tsk.run_list), \
> .time_slice = HZ, \
>- .tasks = LIST_HEAD_INIT(tsk.tasks), \
>+ .tasks = { LIST_HEAD_INIT(tsk.tasks.l), 0}, \
> .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
> .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
> .real_parent = &tsk, \
>--- 2.6/fs/proc/base.c 2003-12-20 09:19:13.000000000 +0100
>+++ build-2.6/fs/proc/base.c 2004-01-06 14:17:20.000000000 +0100
>@@ -1627,33 +1627,6 @@
> #define PROC_MAXPIDS 20
>
> /*
>- * Get a few tgid's to return for filldir - we need to hold the
>- * tasklist lock while doing this, and we must release it before
>- * we actually do the filldir itself, so we use a temp buffer..
>- */
>-static int get_tgid_list(int index, unsigned int *tgids)
>-{
>- struct task_struct *p;
>- int nr_tgids = 0;
>-
>- index--;
>- read_lock(&tasklist_lock);
>- for_each_process(p) {
>- int tgid = p->pid;
>- if (!pid_alive(p))
>- continue;
>- if (--index >= 0)
>- continue;
>- tgids[nr_tgids] = tgid;
>- nr_tgids++;
>- if (nr_tgids >= PROC_MAXPIDS)
>- break;
>- }
>- read_unlock(&tasklist_lock);
>- return nr_tgids;
>-}
>-
>-/*
> * Get a few tid's to return for filldir - we need to hold the
> * tasklist lock while doing this, and we must release it before
> * we actually do the filldir itself, so we use a temp buffer..
>@@ -1685,35 +1658,134 @@
> return nr_tids;
> }
>
>+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin)
>+{
>+ long long retval;
>+ struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
>+
>+ down(&inode->i_sem);
>+ switch (origin) {
>+ case 2:
>+ offset += inode->i_size;
>+ break;
>+ case 1:
>+ offset += file->f_pos;
>+ }
>+ retval = -EINVAL;
>+ if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
>+ if (offset != file->f_pos) {
>+ file->f_pos = offset;
>+ if (file->f_version) {
>+ struct clist_head *cl;
>+ cl = (struct clist_head *)file->f_version;
>+ write_lock_irq(&tasklist_lock);
>+ list_del(&cl->l);
>+ write_unlock_irq(&tasklist_lock);
>+ kfree(cl);
>+ file->f_version = 0;
>+ }
>+ }
>+ retval = offset;
>+ }
>+ up(&inode->i_sem);
>+ return retval;
>+}
>+
> /* for the /proc/ directory itself, after non-process stuff has been done */
> int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
> {
>- unsigned int tgid_array[PROC_MAXPIDS];
>- char buf[PROC_NUMBUF];
> unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
>- unsigned int nr_tgids, i;
>+ struct clist_head fcl; /* fallback if kmalloc fails - -ENOMEM is not permitted */
>+ struct clist_head *pcl;
>
> if (!nr) {
> ino_t ino = fake_ino(0,PROC_TGID_INO);
> if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
> return 0;
> filp->f_pos++;
>- nr++;
>+ } else {
>+ nr--;
> }
>
>- nr_tgids = get_tgid_list(nr, tgid_array);
>+ if (filp->f_version) {
>+ pcl = (struct clist_head *)filp->f_version;
>+ } else {
>+ task_t *p;
>+ pcl = kmalloc(sizeof(struct clist_head), GFP_KERNEL);
>+ if (pcl)
>+ filp->f_version = (unsigned long)pcl;
>+ else
>+ pcl = &fcl;
>+
>+ INIT_LIST_HEAD(&pcl->l);
>+ pcl->is_cookie = 1;
>+
>+ write_lock_irq(&tasklist_lock);
>+ p = &init_task;
>+ while(nr) {
>+ if (pid_alive(p))
>+ nr--;
>+ p = next_task(p);
>+ if (p == &init_task) {
>+ write_unlock_irq(&tasklist_lock);
>+ if (pcl != &fcl) {
>+ filp->f_version = 0;
>+ kfree(pcl);
>+ }
>+ return 0;
>+ }
>+ }
>+ list_add(&pcl->l, &p->tasks.l);
>+ write_unlock_irq(&tasklist_lock);
>+ }
>
>- for (i = 0; i < nr_tgids; i++) {
>- int tgid = tgid_array[i];
>- ino_t ino = fake_ino(tgid,PROC_TGID_INO);
>- unsigned long j = PROC_NUMBUF;
>+ for (;;) {
>+ unsigned int tgid;
>+ int j;
>+ char buf[PROC_NUMBUF];
>+ ino_t ino;
>+ task_t *p;
>+ struct list_head *walk;
>+
>+ write_lock_irq(&tasklist_lock);
>+ walk = &pcl->l;
>+ do {
>+ walk = walk->next;
>+ p = list_entry(walk, struct task_struct, tasks.l);
>+ } while (p->tasks.is_cookie);
>+ if (p == &init_task) {
>+ write_unlock_irq(&tasklist_lock);
>+ break;
>+ }
>+ tgid = p->pid;
>+ list_del(&pcl->l);
>+ list_add(&pcl->l, &p->tasks.l);
>+ write_unlock_irq(&tasklist_lock);
>+
>+ ino = fake_ino(tgid,PROC_TGID_INO);
>
>+ j = PROC_NUMBUF;
> do buf[--j] = '0' + (tgid % 10); while (tgid/=10);
>
>- if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
>+ if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
>+ write_lock_irq(&tasklist_lock);
>+ walk = &pcl->l;
>+ do {
>+ walk = walk->prev;
>+ p = list_entry(walk, struct task_struct, tasks.l);
>+ } while (p->tasks.is_cookie);
>+ list_del(&pcl->l);
>+ list_add_tail(&pcl->l, &p->tasks.l);
>+ write_unlock_irq(&tasklist_lock);
> break;
>+ }
> filp->f_pos++;
> }
>+ if (pcl == &fcl) {
>+ write_lock_irq(&tasklist_lock);
>+ list_del(&fcl.l);
>+ write_unlock_irq(&tasklist_lock);
>+ }
> return 0;
> }
>
>--- 2.6/fs/proc/root.c 2003-10-25 20:44:17.000000000 +0200
>+++ build-2.6/fs/proc/root.c 2004-01-06 13:19:31.000000000 +0100
>@@ -127,6 +127,7 @@
> static struct file_operations proc_root_operations = {
> .read = generic_read_dir,
> .readdir = proc_root_readdir,
>+ .llseek = proc_pid_llseek,
> };
>
> /*
>--- 2.6/kernel/pid.c 2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/kernel/pid.c 2004-01-06 12:29:34.000000000 +0100
>@@ -255,7 +255,7 @@
> attach_pid(thread, PIDTYPE_TGID, thread->tgid);
> attach_pid(thread, PIDTYPE_PGID, leader->__pgrp);
> attach_pid(thread, PIDTYPE_SID, thread->session);
>- list_add_tail(&thread->tasks, &init_task.tasks);
>+ list_add_tail(&thread->tasks.l, &init_task.tasks.l);
>
> attach_pid(leader, PIDTYPE_PID, leader->pid);
> attach_pid(leader, PIDTYPE_TGID, leader->tgid);
>
>
prev parent reply other threads:[~2004-01-06 15:12 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-01-06 13:37 [RFC,PATCH] cookie based proc_pid_readdir Manfred Spraul
2004-01-06 15:16 ` Mika Penttilä [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3FFAD147.7050907@kolumbus.fi \
--to=mika.penttila@kolumbus.fi \
--cc=linux-kernel@vger.kernel.org \
--cc=manfred@colorfullife.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.