public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Mika Penttilä" <mika.penttila@kolumbus.fi>
To: Manfred Spraul <manfred@colorfullife.com>
Cc: linux-kernel@vger.kernel.org
Subject: Re: [RFC,PATCH] cookie based proc_pid_readdir
Date: Tue, 06 Jan 2004 17:16:23 +0200	[thread overview]
Message-ID: <3FFAD147.7050907@kolumbus.fi> (raw)
In-Reply-To: <3FFABA20.3030304@colorfullife.com>

afaics, clist_head from f_version is leaked if not whole directory read.

--Mika



Manfred Spraul wrote:

> proc_dir_readdir skips over tasks if tasks exit between the individual 
> get_tgid_list calls. The only approach that guarantees that no tasks 
> are skipped is to add a cookie into the task list and restart from 
> that cookie.
>
> Attached is a proof of concept patch that adds such cookies - please 
> comment.
>
> -- 
>    Manfred
>
>------------------------------------------------------------------------
>
>// $Header$
>// Kernel Version:
>//  VERSION = 2
>//  PATCHLEVEL = 6
>//  SUBLEVEL = 0
>//  EXTRAVERSION =
>--- 2.6/include/linux/list.h	2003-12-20 09:19:13.000000000 +0100
>+++ build-2.6/include/linux/list.h	2004-01-06 12:26:32.000000000 +0100
>@@ -39,6 +39,16 @@
> } while (0)
> 
> /*
>+ * special linked lists that can contain invisible
>+ * members. Members with nonzero is_cookie should be
>+ * skipped.
>+ */
>+struct clist_head {
>+	struct list_head l;
>+	int is_cookie;
>+};
>+
>+/*
>  * Insert a new entry between two known consecutive entries. 
>  *
>  * This is only for internal list manipulation where we know
>--- 2.6/include/linux/proc_fs.h	2003-10-25 20:42:42.000000000 +0200
>+++ build-2.6/include/linux/proc_fs.h	2004-01-06 12:48:15.000000000 +0100
>@@ -95,6 +95,7 @@
> struct dentry *proc_pid_unhash(struct task_struct *p);
> void proc_pid_flush(struct dentry *proc_dentry);
> int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
>+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin);
> 
> extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
> 						struct proc_dir_entry *parent);
>--- 2.6/include/linux/sched.h	2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/include/linux/sched.h	2004-01-06 12:31:57.000000000 +0100
>@@ -352,7 +352,7 @@
> 	cpumask_t cpus_allowed;
> 	unsigned int time_slice, first_time_slice;
> 
>-	struct list_head tasks;
>+	struct clist_head tasks;
> 	struct list_head ptrace_children;
> 	struct list_head ptrace_list;
> 
>@@ -719,18 +719,31 @@
> 
> #define REMOVE_LINKS(p) do {					\
> 	if (thread_group_leader(p))				\
>-		list_del_init(&(p)->tasks);			\
>+		list_del_init(&(p)->tasks.l);			\
> 	remove_parent(p);					\
> 	} while (0)
> 
> #define SET_LINKS(p) do {					\
> 	if (thread_group_leader(p))				\
>-		list_add_tail(&(p)->tasks,&init_task.tasks);	\
>+		list_add_tail(&(p)->tasks.l,&init_task.tasks.l);	\
> 	add_parent(p, (p)->parent);				\
> 	} while (0)
> 
>-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
>-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
>+static inline task_t * next_task(task_t *p)
>+{
>+	do {
>+		p = list_entry((p)->tasks.l.next, struct task_struct, tasks.l);
>+	} while(p->tasks.is_cookie);
>+	return p;
>+}
>+
>+static inline task_t * prev_task(task_t *p)
>+{
>+	do {
>+		p = list_entry((p)->tasks.l.prev, struct task_struct, tasks.l);
>+	} while(p->tasks.is_cookie);
>+	return p;
>+}
> 
> #define for_each_process(p) \
> 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
>--- 2.6/include/linux/init_task.h	2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/include/linux/init_task.h	2004-01-06 12:32:51.000000000 +0100
>@@ -75,7 +75,7 @@
> 	.active_mm	= &init_mm,					\
> 	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
> 	.time_slice	= HZ,						\
>-	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
>+	.tasks		= { LIST_HEAD_INIT(tsk.tasks.l), 0},		\
> 	.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),		\
> 	.ptrace_list	= LIST_HEAD_INIT(tsk.ptrace_list),		\
> 	.real_parent	= &tsk,						\
>--- 2.6/fs/proc/base.c	2003-12-20 09:19:13.000000000 +0100
>+++ build-2.6/fs/proc/base.c	2004-01-06 14:17:20.000000000 +0100
>@@ -1627,33 +1627,6 @@
> #define PROC_MAXPIDS 20
> 
> /*
>- * Get a few tgid's to return for filldir - we need to hold the
>- * tasklist lock while doing this, and we must release it before
>- * we actually do the filldir itself, so we use a temp buffer..
>- */
>-static int get_tgid_list(int index, unsigned int *tgids)
>-{
>-	struct task_struct *p;
>-	int nr_tgids = 0;
>-
>-	index--;
>-	read_lock(&tasklist_lock);
>-	for_each_process(p) {
>-		int tgid = p->pid;
>-		if (!pid_alive(p))
>-			continue;
>-		if (--index >= 0)
>-			continue;
>-		tgids[nr_tgids] = tgid;
>-		nr_tgids++;
>-		if (nr_tgids >= PROC_MAXPIDS)
>-			break;
>-	}
>-	read_unlock(&tasklist_lock);
>-	return nr_tgids;
>-}
>-
>-/*
>  * Get a few tid's to return for filldir - we need to hold the
>  * tasklist lock while doing this, and we must release it before
>  * we actually do the filldir itself, so we use a temp buffer..
>@@ -1685,35 +1658,134 @@
> 	return nr_tids;
> }
> 
>+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin)
>+{
>+	long long retval;
>+	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
>+
>+	down(&inode->i_sem);
>+	switch (origin) {
>+		case 2:
>+			offset += inode->i_size;
>+			break;
>+		case 1:
>+			offset += file->f_pos;
>+	}
>+	retval = -EINVAL;
>+	if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
>+		if (offset != file->f_pos) {
>+			file->f_pos = offset;
>+			if (file->f_version) {
>+				struct clist_head *cl;
>+				cl = (struct clist_head *)file->f_version;
>+				write_lock_irq(&tasklist_lock);
>+				list_del(&cl->l);
>+				write_unlock_irq(&tasklist_lock);
>+				kfree(cl);
>+				file->f_version = 0;
>+			}
>+		}
>+		retval = offset;
>+	}
>+	up(&inode->i_sem);
>+	return retval;
>+}
>+
> /* for the /proc/ directory itself, after non-process stuff has been done */
> int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
> {
>-	unsigned int tgid_array[PROC_MAXPIDS];
>-	char buf[PROC_NUMBUF];
> 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
>-	unsigned int nr_tgids, i;
>+	struct clist_head fcl; /* fallback if kmalloc fails - -ENOMEM is not permitted */
>+	struct clist_head *pcl;
> 
> 	if (!nr) {
> 		ino_t ino = fake_ino(0,PROC_TGID_INO);
> 		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
> 			return 0;
> 		filp->f_pos++;
>-		nr++;
>+	} else {
>+		nr--;
> 	}
> 
>-	nr_tgids = get_tgid_list(nr, tgid_array);
>+	if (filp->f_version) {
>+		pcl = (struct clist_head *)filp->f_version;
>+	} else {
>+		task_t *p;
>+		pcl = kmalloc(sizeof(struct clist_head), GFP_KERNEL);
>+		if (pcl)
>+			filp->f_version = (unsigned long)pcl;
>+		else
>+			pcl = &fcl;
>+
>+		INIT_LIST_HEAD(&pcl->l);
>+		pcl->is_cookie = 1;
>+		
>+		write_lock_irq(&tasklist_lock);
>+		p = &init_task;
>+		while(nr) {
>+			if (pid_alive(p))
>+				nr--;
>+			p = next_task(p);
>+			if (p == &init_task) {
>+				write_unlock_irq(&tasklist_lock);
>+				if (pcl != &fcl) {
>+					filp->f_version = 0;
>+					kfree(pcl);
>+				}
>+				return 0;
>+			}
>+		}
>+		list_add(&pcl->l, &p->tasks.l);
>+		write_unlock_irq(&tasklist_lock);
>+	}
> 
>-	for (i = 0; i < nr_tgids; i++) {
>-		int tgid = tgid_array[i];
>-		ino_t ino = fake_ino(tgid,PROC_TGID_INO);
>-		unsigned long j = PROC_NUMBUF;
>+	for (;;) {
>+		unsigned int tgid;
>+		int j;
>+		char buf[PROC_NUMBUF];
>+		ino_t ino;
>+		task_t *p;
>+		struct list_head *walk;
>+
>+		write_lock_irq(&tasklist_lock);
>+		walk = &pcl->l;
>+		do {
>+			walk = walk->next;
>+			p = list_entry(walk, struct task_struct, tasks.l);
>+		} while (p->tasks.is_cookie);
>+		if (p == &init_task) {
>+			write_unlock_irq(&tasklist_lock);
>+			break;
>+		}
>+		tgid = p->pid;
>+		list_del(&pcl->l);
>+		list_add(&pcl->l, &p->tasks.l);
>+		write_unlock_irq(&tasklist_lock);
>+
>+		ino = fake_ino(tgid,PROC_TGID_INO);
> 
>+		j = PROC_NUMBUF;
> 		do buf[--j] = '0' + (tgid % 10); while (tgid/=10);
> 
>-		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
>+		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
>+			write_lock_irq(&tasklist_lock);
>+			walk = &pcl->l;
>+			do {
>+				walk = walk->prev;
>+				p = list_entry(walk, struct task_struct, tasks.l);
>+			} while (p->tasks.is_cookie);
>+			list_del(&pcl->l);
>+			list_add_tail(&pcl->l, &p->tasks.l);
>+			write_unlock_irq(&tasklist_lock);
> 			break;
>+		}
> 		filp->f_pos++;
> 	}
>+	if (pcl == &fcl) {
>+		write_lock_irq(&tasklist_lock);
>+		list_del(&fcl.l);
>+		write_unlock_irq(&tasklist_lock);
>+	}
> 	return 0;
> }
> 
>--- 2.6/fs/proc/root.c	2003-10-25 20:44:17.000000000 +0200
>+++ build-2.6/fs/proc/root.c	2004-01-06 13:19:31.000000000 +0100
>@@ -127,6 +127,7 @@
> static struct file_operations proc_root_operations = {
> 	.read		 = generic_read_dir,
> 	.readdir	 = proc_root_readdir,
>+	.llseek		 = proc_pid_llseek,
> };
> 
> /*
>--- 2.6/kernel/pid.c	2003-12-04 19:44:40.000000000 +0100
>+++ build-2.6/kernel/pid.c	2004-01-06 12:29:34.000000000 +0100
>@@ -255,7 +255,7 @@
> 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
> 	attach_pid(thread, PIDTYPE_PGID, leader->__pgrp);
> 	attach_pid(thread, PIDTYPE_SID, thread->session);
>-	list_add_tail(&thread->tasks, &init_task.tasks);
>+	list_add_tail(&thread->tasks.l, &init_task.tasks.l);
> 
> 	attach_pid(leader, PIDTYPE_PID, leader->pid);
> 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
>  
>


      reply	other threads:[~2004-01-06 15:12 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-01-06 13:37 [RFC,PATCH] cookie based proc_pid_readdir Manfred Spraul
2004-01-06 15:16 ` Mika Penttilä [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3FFAD147.7050907@kolumbus.fi \
    --to=mika.penttila@kolumbus.fi \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox