All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Dobriyan <adobriyan@openvz.org>
To: linux-kernel@vger.kernel.org
Cc: devel@openvz.org
Subject: Racy /proc creations interfaces
Date: Wed, 27 Dec 2006 16:42:23 +0300	[thread overview]
Message-ID: <20061227134223.GA6044@localhost.sw.ru> (raw)

1. Not setting ->owner for your favourite /proc file is oopsable.

	$ while true; do cat /proc/fs/xfs/stat 1>/dev/null 2>/dev/null; done
	# while true; do modprobe xfs; rmmod xfs; done

BUG: unable to handle kernel paging request at virtual address fc281049
 printing eip:
fc281049
*pde = 37ca7067
Oops: 0000 [#1]
Modules linked in: af_packet ohci_hcd e1000 ehci_hcd uhci_hcd usbcore
CPU:    0
EIP:    0060:[<fc281049>]    Not tainted VLI
EFLAGS: 00010282   (2.6.19 #2)
EIP is at 0xfc281049
eax: f0705000   ebx: 00000000   ecx: 00000105   edx: f1e5ff68
esi: f0705000   edi: fc281049   ebp: 00000c00   esp: f1e5ff4c
ds: 007b   es: 007b   ss: 0068
Process cat (pid: 32588, ti=f1e5e000 task=f4377030 task.ti=f1e5e000)
Stack: c0177d9c 00000c00 f1e5ff6c 00000000 00001000 08051000 f7ccc240 00000000 
       00000000 edbe82c0 00001000 08051000 f1e5ffa4 c0149f05 f1e5ffa4 c0177be5 
       edbe82c0 fffffff7 08051000 f1e5e000 c014a26d f1e5ffa4 00000105 00000000 
Call Trace:
 [<c0177d9c>] proc_file_read+0x1b7/0x262
 [<c0149f05>] vfs_read+0x85/0xf3
 [<c0177be5>] proc_file_read+0x0/0x262
 [<c014a26d>] sys_read+0x41/0x6a
 [<c0102be1>] sysenter_past_esp+0x56/0x79
 =======================
Code:  Bad EIP value.
EIP: [<fc281049>] 0xfc281049 SS:ESP 0068:f1e5ff4c

2. After adding ->owner for your favourite /proc file it's still looks racy

	pde = create_proc_entry("foo", 0644, NULL);
	/* so what? pde->owner is NULL here */
	if (pde)
		pde->owner = THIS_MODULE;

3. It's more: setting both ->read_proc and ->data after proc entry creation
   should be oopsable too, if ->read_proc operates on data in a non-trivial
   way (or ->write_proc).

sound/oss/trident.c:
  4446				res = create_proc_entry("ALi5451", 0, NULL);
  4447				if (res) {
  4448					res->write_proc = ali_write_proc;
  					/* ->data is NULL here. */
  4449					res->data = card;
  4450				}

   ali_write_proc() does spin_lock_irqsave on private card lock which is
   believed to be valid.

4. Proposed semantics: PDE should be in maximally working state before
   gluing into main /proc tree (proc_register()).

   	struct proc_entry_raw foo_pe_raw = {
		.owner = THIS_MODULE,
		.name = "foo",
		.mode = 0644,
		.read_proc = foo_read_proc,
		.data = foo_data,
		.parent = foo_parent,
	};

	pde = create_proc_entry(&foo_pe_raw);
	if (!pde)
		return -ENOMEM;

   where "struct proc_entry_raw" is cut down version of "struct proc_dir_entry"
   where new create_proc_entry() would do
   
   	proc_create() or kmalloc()
	fill in fields from passed "struct proc_entry_raw"
	proc_register()

Comments?

Below is 0-order approximation to what I mean (not tested at all):

--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -146,17 +146,23 @@ int afs_proc_init(void)
 		goto error;
 	proc_afs->owner = THIS_MODULE;
 
-	p = create_proc_entry("cells", 0, proc_afs);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "cells",
+			.parent = proc_afs,
+			.proc_fops = &afs_proc_cells_fops,
+			.owner = THIS_MODULE,
+		});
 	if (!p)
 		goto error_proc;
-	p->proc_fops = &afs_proc_cells_fops;
-	p->owner = THIS_MODULE;
 
-	p = create_proc_entry("rootcell", 0, proc_afs);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "rootcell",
+			.parent = proc_afs,
+			.proc_fops = &afs_proc_rootcell_fops,
+			.owner = THIS_MODULE,
+		});
 	if (!p)
 		goto error_cells;
-	p->proc_fops = &afs_proc_rootcell_fops;
-	p->owner = THIS_MODULE;
 
 	_leave(" = 0");
 	return 0;
@@ -429,26 +435,35 @@ int afs_proc_cell_setup(struct afs_cell 
 	if (!cell->proc_dir)
 		return -ENOMEM;
 
-	p = create_proc_entry("servers", 0, cell->proc_dir);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "servers",
+			.parent = cell->proc_dir,
+			.proc_fops = &afs_proc_cell_servers_fops,
+			.owner = THIS_MODULE,
+			.data = cell,
+		});
 	if (!p)
 		goto error_proc;
-	p->proc_fops = &afs_proc_cell_servers_fops;
-	p->owner = THIS_MODULE;
-	p->data = cell;
 
-	p = create_proc_entry("vlservers", 0, cell->proc_dir);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "vlservers",
+			.parent = cell->proc_dir,
+			.proc_fops = &afs_proc_cell_vlservers_fops,
+			.owner = THIS_MODULE,
+			.data = cell,
+		});
 	if (!p)
 		goto error_servers;
-	p->proc_fops = &afs_proc_cell_vlservers_fops;
-	p->owner = THIS_MODULE;
-	p->data = cell;
 
-	p = create_proc_entry("volumes", 0, cell->proc_dir);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "volumes",
+			.parent = cell->proc_dir,
+			.proc_fops = &afs_proc_cell_volumes_fops,
+			.owner = THIS_MODULE,
+			.data = cell,
+		});
 	if (!p)
 		goto error_vlservers;
-	p->proc_fops = &afs_proc_cell_volumes_fops;
-	p->owner = THIS_MODULE;
-	p->data = cell;
 
 	_leave(" = 0");
 	return 0;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 10fff94..faf0470 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1975,12 +1975,12 @@ #define JBD_PROC_NAME "sys/fs/jbd-debug"
 
 static void __init create_jbd_proc_entry(void)
 {
-	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-	if (proc_jbd_debug) {
-		/* Why is this so hard? */
-		proc_jbd_debug->read_proc = read_jbd_debug;
-		proc_jbd_debug->write_proc = write_jbd_debug;
-	}
+	__create_proc_entry(&(struct proc_entry_raw){
+			.name = JBD_PROC_NAME,
+			.mode = 0644,
+			.read_proc = read_jbd_debug,
+			.write_proc = write_jbd_debug,
+		});
 }
 
 static void __exit remove_jbd_proc_entry(void)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 44fc32b..32de473 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1986,12 +1986,12 @@ #define JBD_PROC_NAME "sys/fs/jbd2-debug
 
 static void __init create_jbd_proc_entry(void)
 {
-	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-	if (proc_jbd_debug) {
-		/* Why is this so hard? */
-		proc_jbd_debug->read_proc = read_jbd_debug;
-		proc_jbd_debug->write_proc = write_jbd_debug;
-	}
+	__create_proc_entry(&(struct proc_entry_raw){
+			.name = JBD_PROC_NAME,
+			.mode = 0644,
+			.read_proc = read_jbd_debug,
+			.write_proc = write_jbd_debug,
+		});
 }
 
 static void __exit jbd2_remove_jbd_proc_entry(void)
diff --git a/fs/jffs/jffs_proc.c b/fs/jffs/jffs_proc.c
index 9bdd99a..ec00942 100644
--- a/fs/jffs/jffs_proc.c
+++ b/fs/jffs/jffs_proc.c
@@ -85,12 +85,12 @@ int jffs_register_jffs_proc_dir(int mtd,
 	if (!part_root)
 		goto out1;
 
-	/* Create entry for 'info' file */
-	part_info = create_proc_entry ("info", 0, part_root);
-	if (!part_info)
+	if (!__create_proc_entry(&(struct proc_entry_raw){
+			.name = "info",
+			.parent = part_root,
+			.read_proc = jffs_proc_info_read,
+			.data = (void *)c,}))
 		goto out2;
-	part_info->read_proc = jffs_proc_info_read;
-	part_info->data = (void *) c;
 
 	/* Create entry for 'layout' file */
 	part_layout = create_proc_entry ("layout", 0, part_root);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 23ab145..44ca278 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1406,20 +1406,27 @@ int __init nfs_fs_proc_init(void)
 	proc_fs_nfs->owner = THIS_MODULE;
 
 	/* a file of servers with which we're dealing */
-	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "servers",
+			.mode = S_IFREG|S_IRUGO,
+			.parent = proc_fs_nfs,
+			.proc_fops = &nfs_server_list_fops,
+			.owner = THIS_MODULE,
+		});
 	if (!p)
 		goto error_1;
 
-	p->proc_fops = &nfs_server_list_fops;
-	p->owner = THIS_MODULE;
-
 	/* a file of volumes that we have mounted */
-	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	p = __create_proc_entry(&(struct proc_entry_raw){
+			.name = "volumes",
+			.mode = S_IFREG|S_IRUGO,
+			.parent = proc_fs_nfs,
+			.proc_fops = &nfs_volume_list_fops,
+			.owner = THIS_MODULE,
+		});
 	if (!p)
 		goto error_2;
 
-	p->proc_fops = &nfs_volume_list_fops;
-	p->owner = THIS_MODULE;
 	return 0;
 
 error_2:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 853cb87..0bce166 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -657,6 +657,47 @@ struct proc_dir_entry *proc_mkdir(const 
 	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
 }
 
+struct proc_dir_entry *__create_proc_entry(struct proc_entry_raw *raw)
+{
+	struct proc_dir_entry *ent;
+	mode_t mode = raw->mode;
+	nlink_t nlink;
+
+	if (S_ISDIR(mode)) {
+		if ((mode & S_IALLUGO) == 0)
+			mode |= S_IRUGO | S_IXUGO;
+		nlink = 2;
+	} else {
+		if ((mode & S_IFMT) == 0)
+			mode |= S_IFREG;
+		if ((mode & S_IALLUGO) == 0)
+			mode |= S_IRUGO;
+		nlink = 1;
+	}
+
+	ent = proc_create(&raw->parent, raw->name, mode, nlink);
+	if (ent) {
+		if (S_ISDIR(mode)) {
+			ent->proc_fops = &proc_dir_operations;
+			ent->proc_iops = &proc_dir_inode_operations;
+		}
+
+		ent->size = raw->size;
+		if (raw->proc_fops)
+			ent->proc_fops = raw->proc_fops;
+		ent->data = raw->data;
+		ent->read_proc = raw->read_proc;
+		ent->write_proc = raw->write_proc;
+		ent->owner = raw->owner;
+
+		if (proc_register(raw->parent, ent) < 0) {
+			kfree(ent);
+			ent = NULL;
+		}
+	}
+	return ent;
+}
+
 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 					 struct proc_dir_entry *parent)
 {
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 64d242b..f7d3fda 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -166,6 +166,7 @@ struct proc_dir_entry proc_root = {
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
+EXPORT_SYMBOL(__create_proc_entry);
 EXPORT_SYMBOL(remove_proc_entry);
 EXPORT_SYMBOL(proc_root);
 EXPORT_SYMBOL(proc_root_fs);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 87dec8f..0523180 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -68,6 +68,18 @@ struct proc_dir_entry {
 	void *set;
 };
 
+struct proc_entry_raw {
+	const char *name;
+	mode_t mode;
+	loff_t size;
+	const struct file_operations *proc_fops;
+	struct module *owner;
+	struct proc_dir_entry *parent;
+	void *data;
+	read_proc_t *read_proc;
+	write_proc_t *write_proc;
+};
+
 struct kcore_list {
 	struct kcore_list *next;
 	unsigned long addr;
@@ -107,6 +119,7 @@ char *task_mem(struct mm_struct *, char 
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+struct proc_dir_entry *__create_proc_entry(struct proc_entry_raw *raw);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
@@ -214,6 +227,11 @@ static inline void proc_flush_task(struc
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
+static inline struct proc_dir_entry *__create_proc_entry(struct proc_entry_raw *raw)
+{
+	return NULL;
+}
+
 #define remove_proc_entry(name, parent) do {} while (0)
 
 static inline struct proc_dir_entry *proc_symlink(const char *name,
diff --git a/kernel/configs.c b/kernel/configs.c
index 8fa1fb2..1694608 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -85,18 +85,16 @@ static const struct file_operations ikco
 
 static int __init ikconfig_init(void)
 {
-	struct proc_dir_entry *entry;
+	struct proc_entry_raw pe_raw = {
+		.name		= "config.gz",
+		.mode		= S_IFREG | S_IRUGO,
+		.parent		= &proc_root,
+		.proc_fops	= &ikconfig_file_ops,
+		.size		= kernel_config_data_size,
+	};
 
 	/* create the current config file */
-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-				  &proc_root);
-	if (!entry)
-		return -ENOMEM;
-
-	entry->proc_fops = &ikconfig_file_ops;
-	entry->size = kernel_config_data_size;
-
-	return 0;
+	return __create_proc_entry(&pe_raw) ? 0 : -ENOMEM;
 }
 
 /***************************************************/
diff --git a/kernel/dma.c b/kernel/dma.c
index 937b13c..51726c0 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -149,12 +149,11 @@ static const struct file_operations proc
 
 static int __init proc_dma_init(void)
 {
-	struct proc_dir_entry *e;
-
-	e = create_proc_entry("dma", 0, NULL);
-	if (e)
-		e->proc_fops = &proc_dma_operations;
-
+	struct proc_entry_raw pe_raw = {
+		.name	   = "dma",
+		.proc_fops = &proc_dma_operations,
+	};
+	__create_proc_entry(&pe_raw);
 	return 0;
 }
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 61f5c71..aecabf5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -130,17 +130,17 @@ void register_irq_proc(unsigned int irq)
 
 #ifdef CONFIG_SMP
 	{
-		struct proc_dir_entry *entry;
+		struct proc_entry_raw pe_raw = {
+			.name = "smp_affinity",
+			.mode = 0600,
+			.parent = irq_desc[irq].dir,
+			.data = (void *)(long)irq,
+			.read_proc = irq_affinity_read_proc,
+			.write_proc = irq_affinity_write_proc,
+		};
 
 		/* create /proc/irq/<irq>/smp_affinity */
-		entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
-
-		if (entry) {
-			entry->nlink = 1;
-			entry->data = (void *)(long)irq;
-			entry->read_proc = irq_affinity_read_proc;
-			entry->write_proc = irq_affinity_write_proc;
-		}
+		__create_proc_entry(&pe_raw);
 	}
 #endif
 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f294ff..3f26794 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -442,11 +442,12 @@ static const struct file_operations kall
 
 static int __init kallsyms_init(void)
 {
-	struct proc_dir_entry *entry;
-
-	entry = create_proc_entry("kallsyms", 0444, NULL);
-	if (entry)
-		entry->proc_fops = &kallsyms_operations;
+	struct proc_entry_raw pe_raw = {
+		.name	   = "kallsyms",
+		.mode	   = 0444,
+		.proc_fops = &kallsyms_operations,
+	};
+	__create_proc_entry(&pe_raw);
 	return 0;
 }
 __initcall(kallsyms_init);
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index b554b40..3d297d4 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -328,16 +328,19 @@ static const struct file_operations proc
 
 static int __init lockdep_proc_init(void)
 {
-	struct proc_dir_entry *entry;
-
-	entry = create_proc_entry("lockdep", S_IRUSR, NULL);
-	if (entry)
-		entry->proc_fops = &proc_lockdep_operations;
-
-	entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
-	if (entry)
-		entry->proc_fops = &proc_lockdep_stats_operations;
-
+	struct proc_entry_raw pe_raw_lockdep = {
+		.name = "lockdep",
+		.mode = S_IRUSR,
+		.proc_fops = &proc_lockdep_operations,
+	};
+	struct proc_entry_raw pe_raw_lockdep_stats = {
+		.name = "lockdep_stats",
+		.mode = S_IRUSR,
+		.proc_fops = &proc_lockdep_stats_operations,
+	};
+
+	__create_proc_entry(&pe_raw_lockdep);
+	__create_proc_entry(&pe_raw_lockdep_stats);
 	return 0;
 }
 
diff --git a/kernel/profile.c b/kernel/profile.c
index fb5e03d..19b73a0 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -429,15 +429,16 @@ static int prof_cpu_mask_write_proc (str
 
 void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
 {
-	struct proc_dir_entry *entry;
-
+	struct proc_entry_raw pe_raw = {
+		.name = "prof_cpu_mask",
+		.mode = 0600,
+		.parent = root_irq_dir,
+		.data = &prof_cpu_mask,
+		.read_proc = prof_cpu_mask_read_proc,
+		.write_proc = prof_cpu_mask_write_proc,
+	};
 	/* create /proc/irq/prof_cpu_mask */
-	if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
-		return;
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
+	__create_proc_entry(&pe_raw);
 }
 
 /*
@@ -561,16 +562,19 @@ #endif
 
 static int __init create_proc_profile(void)
 {
-	struct proc_dir_entry *entry;
+	struct proc_entry_raw pe_raw = {
+		.name	   = "profile",
+		.mode	   = S_IWUSR | S_IRUGO,
+		.proc_fops = &proc_profile_operations,
+		.size	   = (1 + prof_len) * sizeof(atomic_t),
+	};
 
 	if (!prof_on)
 		return 0;
 	if (create_hash_tables())
 		return -1;
-	if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
+	if (!__create_proc_entry(&pe_raw))
 		return 0;
-	entry->proc_fops = &proc_profile_operations;
-	entry->size = (1+prof_len) * sizeof(atomic_t);
 	hotcpu_notifier(profile_cpu_callback, 0);
 	return 0;
 }
diff --git a/kernel/resource.c b/kernel/resource.c
index 7b9a497..7caf831 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -131,14 +131,17 @@ static const struct file_operations proc
 
 static int __init ioresources_init(void)
 {
-	struct proc_dir_entry *entry;
-
-	entry = create_proc_entry("ioports", 0, NULL);
-	if (entry)
-		entry->proc_fops = &proc_ioports_operations;
-	entry = create_proc_entry("iomem", 0, NULL);
-	if (entry)
-		entry->proc_fops = &proc_iomem_operations;
+	struct proc_entry_raw pe_raw_ioports = {
+		.name = "ioports",
+		.proc_fops = &proc_ioports_operations,
+	};
+	struct proc_entry_raw pe_raw_iomem = {
+		.name = "iomem",
+		.proc_fops = &proc_iomem_operations,
+	};
+
+	__create_proc_entry(&pe_raw_ioports);
+	__create_proc_entry(&pe_raw_iomem);
 	return 0;
 }
 __initcall(ioresources_init);


             reply	other threads:[~2006-12-27 13:35 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-12-27 13:42 Alexey Dobriyan [this message]
2006-12-27 13:56 ` Racy /proc creations interfaces Al Viro
2006-12-28  8:21   ` Alexey Dobriyan
2007-01-15 15:39     ` [PATCH] Fix rmmod/read/write races in /proc entries Alexey Dobriyan
2007-01-23 20:58       ` Andrew Morton
2007-01-24 15:22         ` Alexey Dobriyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061227134223.GA6044@localhost.sw.ru \
    --to=adobriyan@openvz.org \
    --cc=devel@openvz.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.