All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kirill Korotaev <dev@sw.ru>
To: Andrew Morton <akpm@osdl.org>
Cc: Steven Rostedt <rostedt@goodmis.org>,
	linux-kernel@vger.kernel.org, mingo@elte.hu
Subject: Re: [PATCH] protect remove_proc_entry
Date: Sat, 31 Dec 2005 11:53:38 +0300	[thread overview]
Message-ID: <43B64712.3000105@sw.ru> (raw)
In-Reply-To: <20051230154647.5a38227e.akpm@osdl.org>

[-- Attachment #1: Type: text/plain, Size: 1407 bytes --]

Hi Andrew,

I have a full patch for this.
I don't remember the details yet, but lock was not god here, we used 
semaphore. I pointed to this problem long ago when fixed error path in 
proc with moduleget.

This patch protects proc_dir_entry tree with a proc_tree_sem semaphore. 
I suppose lock_kernel() can be removed later after checking that no proc 
handlers require it.
Also this patch remakes de refcounters a bit making it more clear and 
more similar to dentry scheme - this is required to make sure that 
everything works correctly.

Patch is against 2.6.15-rcX and was tested for about a week. Also works 
half a year on 2.6.8 :)

Signed-Off-By: Pavel Emelianov <xemul@sw.ru>
Signed-Off-By: Kirill Korotaev <dev@openvz.org>

Kirill

> Steven Rostedt <rostedt@goodmis.org> wrote:
>> +static DEFINE_SPINLOCK(remove_proc_lock);
>>
> 
> I'll take a closer look at this next week.
> 
> The official way of protecting the contents of a directory from concurrent
> lookup or modification is to take its i_sem.  But procfs is totally weird
> and that approach may well not be practical here.  We'd certainly prefer
> not to rely upon lock_kernel().
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

[-- Attachment #2: diff-ms-proc-locks-20051231 --]
[-- Type: text/plain, Size: 7112 bytes --]

--- ./fs/proc/generic.c.proclk	2005-12-26 13:43:14.000000000 +0300
+++ ./fs/proc/generic.c	2005-12-31 11:48:16.000000000 +0300
@@ -27,6 +27,8 @@ static ssize_t proc_file_write(struct fi
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
+static DECLARE_RWSEM(proc_tree_sem);
+
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
 	if (de->namelen != len)
@@ -381,6 +383,7 @@ struct dentry *proc_lookup(struct inode 
 	lock_kernel();
 	de = PDE(dir);
 	if (de) {
+		down_read(&proc_tree_sem);
 		for (de = de->subdir; de ; de = de->next) {
 			if (de->namelen != dentry->d_name.len)
 				continue;
@@ -392,6 +395,7 @@ struct dentry *proc_lookup(struct inode 
 				break;
 			}
 		}
+		up_read(&proc_tree_sem);
 	}
 	unlock_kernel();
 
@@ -446,12 +450,13 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
+			down_read(&proc_tree_sem);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
 				if (!de) {
 					ret = 1;
-					goto out;
+					goto out_up;
 				}
 				if (!i)
 					break;
@@ -462,14 +467,18 @@ int proc_readdir(struct file * filp,
 			do {
 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
 					    de->low_ino, de->mode >> 12) < 0)
-					goto out;
+					goto out_up;
 				filp->f_pos++;
 				de = de->next;
 			} while (de);
+			up_read(&proc_tree_sem);
 	}
 	ret = 1;
 out:	unlock_kernel();
 	return ret;	
+out_up:
+	up_read(&proc_tree_sem);
+	goto out;
 }
 
 /*
@@ -517,6 +526,7 @@ static int proc_register(struct proc_dir
 		if (dp->proc_iops == NULL)
 			dp->proc_iops = &proc_file_inode_operations;
 	}
+	de_get(dir);
 	return 0;
 }
 
@@ -576,6 +586,7 @@ static struct proc_dir_entry *proc_creat
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
+	atomic_set(&ent->count, 1);
 	ent->name = ((char *) ent) + sizeof(*ent);
 	ent->namelen = len;
 	ent->mode = mode;
@@ -589,6 +600,7 @@ struct proc_dir_entry *proc_symlink(cons
 {
 	struct proc_dir_entry *ent;
 
+	down_write(&proc_tree_sem);
 	ent = proc_create(&parent,name,
 			  (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
 
@@ -606,6 +618,7 @@ struct proc_dir_entry *proc_symlink(cons
 			ent = NULL;
 		}
 	}
+	up_write(&proc_tree_sem);
 	return ent;
 }
 
@@ -614,6 +627,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 {
 	struct proc_dir_entry *ent;
 
+	down_write(&proc_tree_sem);
 	ent = proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
 		ent->proc_fops = &proc_dir_operations;
@@ -624,6 +638,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 			ent = NULL;
 		}
 	}
+	up_write(&proc_tree_sem);
 	return ent;
 }
 
@@ -633,7 +648,7 @@ struct proc_dir_entry *proc_mkdir(const 
 	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
 }
 
-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+static struct proc_dir_entry *__create_proc_entry(const char *name, mode_t mode,
 					 struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry *ent;
@@ -665,6 +680,17 @@ struct proc_dir_entry *create_proc_entry
 	return ent;
 }
 
+struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+					 struct proc_dir_entry *parent)
+{
+	struct proc_dir_entry *ent;
+
+	down_write(&proc_tree_sem);
+	ent = __create_proc_entry(name, mode, parent);
+	up_write(&proc_tree_sem);
+	return ent;
+}
+
 void free_proc_entry(struct proc_dir_entry *de)
 {
 	unsigned int ino = de->low_ino;
@@ -683,15 +709,13 @@ void free_proc_entry(struct proc_dir_ent
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
@@ -699,20 +723,24 @@ void remove_proc_entry(const char *name,
 		de = *p;
 		*p = de->next;
 		de->next = NULL;
+		de_put(parent);
 		if (S_ISDIR(de->mode))
 			parent->nlink--;
 		proc_kill_inodes(de);
 		de->nlink = 0;
 		WARN_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		de->deleted = 1;
+		de_put(de);
 		break;
 	}
-out:
-	return;
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *fn = name;
+
+	down_write(&proc_tree_sem);
+	if (parent || xlate_proc_name(name, &parent, &fn) == 0)
+		__remove_proc_entry(name, parent);
+	up_write(&proc_tree_sem);
 }
--- ./fs/proc/inode.c.proclk	2005-12-26 13:43:14.000000000 +0300
+++ ./fs/proc/inode.c	2005-12-31 11:48:16.000000000 +0300
@@ -21,34 +21,25 @@
 
 extern void free_proc_entry(struct proc_dir_entry *);
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
 	if (de) {	
-		lock_kernel();		
 		if (!atomic_read(&de->count)) {
 			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
 			return;
 		}
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
-				printk("de_put: deferred delete of %s\n",
-					de->name);
-				free_proc_entry(de);
+			if (!de->deleted) {
+				printk("de_put: entry %s is not removed yet\n",
+						de->name);
+				return;
 			}
-		}		
-		unlock_kernel();
+			free_proc_entry(de);
+		}
 	}
 }
 
--- ./include/linux/proc_fs.h.proclk	2005-12-26 13:43:16.000000000 +0300
+++ ./include/linux/proc_fs.h	2005-12-31 11:48:16.000000000 +0300
@@ -69,6 +69,14 @@ struct proc_dir_entry {
 	void *set;
 };
 
+extern void de_put(struct proc_dir_entry *);
+static inline struct proc_dir_entry *de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
 struct kcore_list {
 	struct kcore_list *next;
 	unsigned long addr;
--- ./kernel/sysctl.c.proclk	2005-12-26 13:43:16.000000000 +0300
+++ ./kernel/sysctl.c	2005-12-31 11:48:37.000000000 +0300
@@ -1396,19 +1396,15 @@ static void unregister_proc_table(ctl_ta
 				continue;
 		}
 
-		/*
-		 * In any case, mark the entry as goner; we'll keep it
-		 * around if it's busy, but we'll know to do nothing with
-		 * its fields.  We are under sysctl_lock here.
-		 */
 		de->data = NULL;
-
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
+		/*
+		 * sys_sysctl can't find us, since we are removed from list.
+		 * proc won't touch either, since de->data is NULL.
+		 */
+		spin_unlock(&sysctl_lock);
 		remove_proc_entry(table->procname, root);
+		spin_lock(&sysctl_lock);
 	}
 }
 

  parent reply	other threads:[~2005-12-31  8:54 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-30 20:04 [Question] race condition with remove_proc_entry Steven Rostedt
2005-12-30 21:28 ` [PATCH] protect remove_proc_entry Steven Rostedt
2005-12-30 21:34   ` Daniel Walker
2005-12-30 21:55     ` Steven Rostedt
2005-12-30 21:55   ` Mitchell Blank Jr
2005-12-30 22:09     ` Steven Rostedt
2005-12-30 22:18       ` Steven Rostedt
2006-01-04  9:21         ` Andrew Morton
2006-01-04 12:18           ` Steven Rostedt
2006-01-05  1:48             ` Mitchell Blank Jr
2006-01-07 11:25       ` Andrew Morton
2005-12-30 22:11     ` Steven Rostedt
2005-12-30 23:46   ` Andrew Morton
2005-12-31  6:58     ` Steven Rostedt
2005-12-31  8:34       ` Arjan van de Ven
2005-12-31  8:53     ` Kirill Korotaev [this message]
2006-01-04  9:36       ` Andrew Morton
2006-01-04 11:27         ` Kirill Korotaev
2006-01-02 13:02     ` Steven Rostedt
2006-01-07 11:36   ` Andrew Morton
2006-01-07 12:04     ` Steven Rostedt
2006-01-09 19:16     ` Steven Rostedt
2006-01-10  0:59       ` Steven Rostedt
2006-01-10  1:05         ` Ingo Molnar
2006-01-10 13:26       ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=43B64712.3000105@sw.ru \
    --to=dev@sw.ru \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.