public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sysfs: Don't block indefinitely for unmapped files.
@ 2009-02-25  8:16 Eric W. Biederman
  2009-02-25  9:16 ` Tejun Heo
  2009-02-26 21:13 ` Andrew Morton
  0 siblings, 2 replies; 3+ messages in thread
From: Eric W. Biederman @ 2009-02-25  8:16 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Tejun Heo, linux-pci, Jesse Barnes, Andrew Morton


This patch modifies sysfs bin files so that we can remove
the bin file while they are still mapped.  When the kobject
is removed we unmap the bin file and arrange for future
accesses to the mapping to receive SIGBUS.

Implementing this prevents a nasty DOS when pci devices are
hot plugged and unplugged.  Where if any of their resources
were mmaped the kernel could not free up their pci resources
or release their pci data structures.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 fs/sysfs/bin.c   |  183 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/sysfs/dir.c   |    1 +
 fs/sysfs/sysfs.h |    2 +
 3 files changed, 174 insertions(+), 12 deletions(-)

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c..f02a3c7 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
 struct bin_buffer {
-	struct mutex	mutex;
-	void		*buffer;
-	int		mmapped;
+	struct mutex			mutex;
+	void				*buffer;
+	int				mmapped;
+	struct vm_operations_struct 	*vm_ops;
+	struct file			*file;
+	struct hlist_node		list;
 };
 
 static int
@@ -168,29 +181,148 @@ out_free:
 	return count;
 }
 
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+	if (!bb->vm_ops || !bb->vm_ops->open)
+		return;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return;
+
+	bb->vm_ops->open(vma);
+
+	sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+	if (!bb->vm_ops || !bb->vm_ops->close)
+		return;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return;
+
+	bb->vm_ops->close(vma);
+
+	sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->fault)
+		return VM_FAULT_SIGBUS;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return VM_FAULT_SIGBUS;
+
+	ret = bb->vm_ops->fault(vma, vmf);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->page_mkwrite)
+		return -EINVAL;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return -EINVAL;
+
+	ret = bb->vm_ops->page_mkwrite(vma, page);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+		  void *buf, int len, int write)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->access)
+		return -EINVAL;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return -EINVAL;
+
+	ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static struct vm_operations_struct bin_vm_ops = {
+	.open		= bin_vma_open,
+	.close		= bin_vma_close,
+	.fault		= bin_fault,
+	.page_mkwrite	= bin_page_mkwrite,
+	.access		= bin_access,
+};
+
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct bin_buffer *bb = file->private_data;
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+	struct vm_operations_struct *vm_ops;
 	int rc;
 
 	mutex_lock(&bb->mutex);
 
 	/* need attr_sd for attr, its parent for kobj */
+	rc = -ENODEV;
 	if (!sysfs_get_active_two(attr_sd))
-		return -ENODEV;
+		goto out_unlock;
 
 	rc = -EINVAL;
-	if (attr->mmap)
-		rc = attr->mmap(kobj, attr, vma);
+	if (!attr->mmap)
+		goto out_put;
 
-	if (rc == 0 && !bb->mmapped)
-		bb->mmapped = 1;
-	else
-		sysfs_put_active_two(attr_sd);
+	rc = attr->mmap(kobj, attr, vma);
+	vm_ops = vma->vm_ops;
+	vma->vm_ops = &bin_vm_ops;
+	if (rc)
+		goto out_put;
+
+	rc = -EINVAL;
+	if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+		goto out_put;
 
+#ifdef CONFIG_NUMA
+	rc = -EINVAL;
+	if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate)))
+		goto out_put;
+#endif
+
+	rc = 0;
+	bb->mmapped = 1;
+	bb->vm_ops = vm_ops;
+out_put:
+	sysfs_put_active_two(attr_sd);
+out_unlock:
 	mutex_unlock(&bb->mutex);
 
 	return rc;
@@ -223,8 +355,13 @@ static int open(struct inode * inode, struct file * file)
 		goto err_out;
 
 	mutex_init(&bb->mutex);
+	bb->file = file;
 	file->private_data = bb;
 
+	mutex_lock(&sysfs_bin_lock);
+	hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+	mutex_unlock(&sysfs_bin_lock);
+
 	/* open succeeded, put active references */
 	sysfs_put_active_two(attr_sd);
 	return 0;
@@ -240,8 +377,10 @@ static int release(struct inode * inode, struct file * file)
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_buffer *bb = file->private_data;
 
-	if (bb->mmapped)
-		sysfs_put_active_two(attr_sd);
+	mutex_lock(&sysfs_bin_lock);
+	hlist_del(&bb->list);
+	mutex_unlock(&sysfs_bin_lock);
+
 	kfree(bb->buffer);
 	kfree(bb);
 	return 0;
@@ -256,6 +395,26 @@ const struct file_operations bin_fops = {
 	.release	= release,
 };
 
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+	struct bin_buffer *bb;
+	struct hlist_node *tmp;
+
+	if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+		return;
+
+	mutex_lock(&sysfs_bin_lock);
+
+	hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+		struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+	}
+
+	mutex_unlock(&sysfs_bin_lock);
+}
+
 /**
  *	sysfs_create_bin_file - create binary file for object.
  *	@kobj:	object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79..5dcea0d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -581,6 +581,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 
 		sysfs_drop_dentry(sd);
 		sysfs_deactivate(sd);
+		unmap_bin_file(sd);
 		sysfs_put(sd);
 	}
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b..bee5344 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
 
 struct sysfs_elem_bin_attr {
 	struct bin_attribute	*bin_attr;
+	struct hlist_head	buffers;
 };
 
 /*
@@ -163,6 +164,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
  * bin.c
  */
 extern const struct file_operations bin_fops;
+void unmap_bin_file(struct sysfs_dirent *attr_sd);
 
 /*
  * symlink.c
-- 
1.6.1.2.350.g88cc


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] sysfs: Don't block indefinitely for unmapped files.
  2009-02-25  8:16 [PATCH] sysfs: Don't block indefinitely for unmapped files Eric W. Biederman
@ 2009-02-25  9:16 ` Tejun Heo
  2009-02-26 21:13 ` Andrew Morton
  1 sibling, 0 replies; 3+ messages in thread
From: Tejun Heo @ 2009-02-25  9:16 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Greg Kroah-Hartman, linux-kernel, linux-pci, Jesse Barnes,
	Andrew Morton

Hello, Eric.

Eric W. Biederman wrote:
> This patch modifies sysfs bin files so that we can remove
> the bin file while they are still mapped.  When the kobject
> is removed we unmap the bin file and arrange for future
> accesses to the mapping to receive SIGBUS.
> 
> Implementing this prevents a nasty DOS when pci devices are
> hot plugged and unplugged.  Where if any of their resources
> were mmaped the kernel could not free up their pci resources
> or release their pci data structures.
>
> Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>

Heh... this is so cool.  I've never thought about it.  I would really
like more comments but other than that

Acked-by: Tejun Heo <tj@kernel.org>

-- 
tejun

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] sysfs: Don't block indefinitely for unmapped files.
  2009-02-25  8:16 [PATCH] sysfs: Don't block indefinitely for unmapped files Eric W. Biederman
  2009-02-25  9:16 ` Tejun Heo
@ 2009-02-26 21:13 ` Andrew Morton
  1 sibling, 0 replies; 3+ messages in thread
From: Andrew Morton @ 2009-02-26 21:13 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: gregkh, linux-kernel, htejun, linux-pci, jbarnes

On Wed, 25 Feb 2009 00:16:20 -0800
ebiederm@xmission.com (Eric W. Biederman) wrote:

> +/*
> + * There's one bin_buffer for each open file.
> + *
> + * filp->private_data points to bin_buffer and
> + * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s

what is this trying to say?

> + * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
> + */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-02-26 21:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-25  8:16 [PATCH] sysfs: Don't block indefinitely for unmapped files Eric W. Biederman
2009-02-25  9:16 ` Tejun Heo
2009-02-26 21:13 ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox