[PATCH 0/4] uio hotplug support

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/4] uio hotplug support
@ 2014-12-16 19:52 Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device Mandeep Sandhu
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Mandeep Sandhu @ 2014-12-16 19:52 UTC (permalink / raw)
  To: hjk, gregkh; +Cc: linux-kernel, viro, linux-fsdevel, Mandeep Sandhu

Implement the ability to hotunplug a uio device while file handles
are still open without crashing.

The "locking" for hotunplug support is implemented in
a generic library, that should be reusable to make this
kind of support easier to add in other pieces of the kernel.

I'm re-submitting this patch originally authored by Eric W. Biederman.

See: https://lkml.org/lkml/2010/9/20/21

I have made minor modifications for making it compile with the latest
kernel version (3.18.0-rc7+).

I have tested this patch on Greg's staging-next branch with a fake
hotplug driver that I wrote. The code for the fake hotplug driver is
available here:
https://github.com/mandeepsandhu/uio-hotplug-test

I would like to attribute the patch to Eric, as he's the original
author and can possibly be the maintainer for fs/libunload.c? I can
put my name in "Tested-by" if people are satisified with the fake
test program I wrote.

Mandeep Sandhu (4):
  staging: uio: Simplify the lifetime logic of struct uio_device.
  staging: uio: Remove unused uio_info mmap method.
  staging: libunload: A library to help remove open files
  staging: uio: Implement hotunplug support, using libunload

 drivers/uio/uio.c          | 305 ++++++++++++++++++++++++++++++++-------------
 fs/Makefile                |   2 +-
 fs/libunload.c             | 169 +++++++++++++++++++++++++
 include/linux/uio_driver.h |  16 +--
 include/linux/unload.h     |  35 ++++++
 5 files changed, 427 insertions(+), 100 deletions(-)
 create mode 100644 fs/libunload.c
 create mode 100644 include/linux/unload.h

-- 
1.9.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device.
  2014-12-16 19:52 [PATCH 0/4] uio hotplug support Mandeep Sandhu
@ 2014-12-16 19:52 ` Mandeep Sandhu
  2015-01-09 23:27   ` Greg KH
  2014-12-16 19:52 ` [PATCH 2/4] staging: uio: Remove unused uio_info mmap method Mandeep Sandhu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Mandeep Sandhu @ 2014-12-16 19:52 UTC (permalink / raw)
  To: hjk, gregkh; +Cc: linux-kernel, viro, linux-fsdevel, Mandeep Sandhu

Embed struct device into struct uio_device, and use
the refcounting and the release method of struct device
to control struct uio_device.

This allows device_create and device_destroy to be replaced
with the more standard device_register and device_unregister,
and allows the struct device reference count to act as a
reference count on struct idev as well.

Tested-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
Signed-off-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
---
 drivers/uio/uio.c          | 41 ++++++++++++++++++++++++++---------------
 include/linux/uio_driver.h |  3 ++-
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 60fa627..708b093 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/poll.h>
-#include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/idr.h>
@@ -270,7 +269,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		if (!map_found) {
 			map_found = 1;
 			idev->map_dir = kobject_create_and_add("maps",
-							&idev->dev->kobj);
+							&idev->device.kobj);
 			if (!idev->map_dir)
 				goto err_map;
 		}
@@ -295,7 +294,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 		if (!portio_found) {
 			portio_found = 1;
 			idev->portio_dir = kobject_create_and_add("portio",
-							&idev->dev->kobj);
+							&idev->device.kobj);
 			if (!idev->portio_dir)
 				goto err_portio;
 		}
@@ -334,7 +333,7 @@ err_map_kobj:
 		kobject_put(&map->kobj);
 	}
 	kobject_put(idev->map_dir);
-	dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
+	dev_err(&idev->device, "error creating sysfs files (%d)\n", ret);
 	return ret;
 }
 
@@ -371,7 +370,7 @@ static int uio_get_minor(struct uio_device *idev)
 		idev->minor = retval;
 		retval = 0;
 	} else if (retval == -ENOSPC) {
-		dev_err(idev->dev, "too many uio devices\n");
+		dev_err(&idev->device, "too many uio devices\n");
 		retval = -EINVAL;
 	}
 	mutex_unlock(&minor_lock);
@@ -785,6 +784,13 @@ static void release_uio_class(void)
 	uio_major_cleanup();
 }
 
+static void uio_device_release(struct device *dev)
+{
+	struct uio_device *idev = dev_get_drvdata(dev);
+
+	devm_kfree(dev->parent, idev);
+}
+
 /**
  * uio_register_device - register a new userspace IO device
  * @owner:	module that creates the new device
@@ -819,14 +825,19 @@ int __uio_register_device(struct module *owner,
 	if (ret)
 		return ret;
 
-	idev->dev = device_create(&uio_class, parent,
-				  MKDEV(uio_major, idev->minor), idev,
-				  "uio%d", idev->minor);
-	if (IS_ERR(idev->dev)) {
-		printk(KERN_ERR "UIO: device register failed\n");
-		ret = PTR_ERR(idev->dev);
+	idev->device.devt = MKDEV(uio_major, idev->minor);
+	idev->device.class = &uio_class;
+	idev->device.parent = parent;
+	idev->device.release = uio_device_release;
+	dev_set_drvdata(&idev->device, idev);
+
+	ret = kobject_set_name(&idev->device.kobj, "uio%d", idev->minor);
+	if (ret)
+		goto err_device_create;
+
+	ret = device_register(&idev->device);
+	if (ret)
 		goto err_device_create;
-	}
 
 	ret = uio_dev_add_attributes(idev);
 	if (ret)
@@ -835,7 +846,7 @@ int __uio_register_device(struct module *owner,
 	info->uio_dev = idev;
 
 	if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
-		ret = devm_request_irq(idev->dev, info->irq, uio_interrupt,
+		ret = devm_request_irq(&idev->device, info->irq, uio_interrupt,
 				  info->irq_flags, info->name, idev);
 		if (ret)
 			goto err_request_irq;
@@ -846,7 +857,7 @@ int __uio_register_device(struct module *owner,
 err_request_irq:
 	uio_dev_del_attributes(idev);
 err_uio_dev_add_attributes:
-	device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
+	device_unregister(&idev->device);
 err_device_create:
 	uio_free_minor(idev);
 	return ret;
@@ -871,7 +882,7 @@ void uio_unregister_device(struct uio_info *info)
 
 	uio_dev_del_attributes(idev);
 
-	device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
+	device_unregister(&idev->device);
 
 	return;
 }
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index baa8171..3ed7fca 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -14,6 +14,7 @@
 #ifndef _UIO_DRIVER_H_
 #define _UIO_DRIVER_H_
 
+#include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
 
@@ -65,7 +66,7 @@ struct uio_port {
 
 struct uio_device {
         struct module           *owner;
-        struct device           *dev;
+	struct device           device;
         int                     minor;
         atomic_t                event;
         struct fasync_struct    *async_queue;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device.
  2014-12-16 19:52 ` [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device Mandeep Sandhu
@ 2015-01-09 23:27   ` Greg KH
  2015-01-09 23:36     ` Mandeep Sandhu
  0 siblings, 1 reply; 8+ messages in thread
From: Greg KH @ 2015-01-09 23:27 UTC (permalink / raw)
  To: Mandeep Sandhu; +Cc: hjk, linux-kernel, viro, linux-fsdevel

On Tue, Dec 16, 2014 at 11:52:21AM -0800, Mandeep Sandhu wrote:
> Embed struct device into struct uio_device, and use
> the refcounting and the release method of struct device
> to control struct uio_device.
> 
> This allows device_create and device_destroy to be replaced
> with the more standard device_register and device_unregister,
> and allows the struct device reference count to act as a
> reference count on struct idev as well.
> 
> Tested-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
> Signed-off-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
> ---
>  drivers/uio/uio.c          | 41 ++++++++++++++++++++++++++---------------
>  include/linux/uio_driver.h |  3 ++-
>  2 files changed, 28 insertions(+), 16 deletions(-)

The subject makes no sense, this has nothing to do with the staging tree
:(

Please fix up and resend this series.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device.
  2015-01-09 23:27   ` Greg KH
@ 2015-01-09 23:36     ` Mandeep Sandhu
  2015-01-09 23:45       ` Greg KH
  0 siblings, 1 reply; 8+ messages in thread
From: Mandeep Sandhu @ 2015-01-09 23:36 UTC (permalink / raw)
  To: Greg KH; +Cc: hjk, linux-kernel, viro, linux-fsdevel

>
> The subject makes no sense, this has nothing to do with the staging tree
> :(

Newbie question. What tree should I base it against?

Thanks,
-mandeep

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device.
  2015-01-09 23:36     ` Mandeep Sandhu
@ 2015-01-09 23:45       ` Greg KH
  0 siblings, 0 replies; 8+ messages in thread
From: Greg KH @ 2015-01-09 23:45 UTC (permalink / raw)
  To: Mandeep Sandhu; +Cc: hjk, linux-kernel, viro, linux-fsdevel

On Fri, Jan 09, 2015 at 03:36:34PM -0800, Mandeep Sandhu wrote:
> >
> > The subject makes no sense, this has nothing to do with the staging tree
> > :(
> 
> Newbie question. What tree should I base it against?

my char-misc.git on kernel.org.  Looks like that's not in the
MAINTAINERS file, care to add that as an entry there?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/4] staging: uio: Remove unused uio_info mmap method.
  2014-12-16 19:52 [PATCH 0/4] uio hotplug support Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device Mandeep Sandhu
@ 2014-12-16 19:52 ` Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 3/4] staging: libunload: A library to help remove open files Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 4/4] staging: uio: Implement hotunplug support, using libunload Mandeep Sandhu
  3 siblings, 0 replies; 8+ messages in thread
From: Mandeep Sandhu @ 2014-12-16 19:52 UTC (permalink / raw)
  To: hjk, gregkh; +Cc: linux-kernel, viro, linux-fsdevel, Mandeep Sandhu

There are no drivers in the kernel that implement the uio_info mmap
method so there is no point in keeping it.

Further keeping the mmap method would necessitate wrapping all of the
methods in vm_operations_struct to successfully implement support for
hotunplugable hardware, and it I have yet to find a correct way to wrap
the the vm_operations_struct close method.

Tested-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
Signed-off-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
---
 drivers/uio/uio.c          | 6 ------
 include/linux/uio_driver.h | 2 --
 2 files changed, 8 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 708b093..91b9332 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -670,7 +670,6 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 	struct uio_device *idev = listener->dev;
 	int mi;
 	unsigned long requested_pages, actual_pages;
-	int ret = 0;

 	if (vma->vm_end < vma->vm_start)
 		return -EINVAL;
@@ -687,11 +686,6 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 	if (requested_pages > actual_pages)
 		return -EINVAL;

-	if (idev->info->mmap) {
-		ret = idev->info->mmap(idev->info, vma);
-		return ret;
-	}
-
 	switch (idev->info->mem[mi].memtype) {
 		case UIO_MEM_PHYS:
 			return uio_mmap_physical(vma);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 3ed7fca..839c4c2 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -87,7 +87,6 @@ struct uio_device {
  * @irq_flags:		flags for request_irq()
  * @priv:		optional private data
  * @handler:		the device's irq handler
- * @mmap:		mmap operation for this uio device
  * @open:		open operation for this uio device
  * @release:		release operation for this uio device
  * @irqcontrol:		disable/enable irqs when 0/1 is written to /dev/uioX
@@ -102,7 +101,6 @@ struct uio_info {
 	unsigned long		irq_flags;
 	void			*priv;
 	irqreturn_t (*handler)(int irq, struct uio_info *dev_info);
-	int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);
 	int (*open)(struct uio_info *info, struct inode *inode);
 	int (*release)(struct uio_info *info, struct inode *inode);
 	int (*irqcontrol)(struct uio_info *info, s32 irq_on);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] staging: libunload: A library to help remove open files
  2014-12-16 19:52 [PATCH 0/4] uio hotplug support Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 2/4] staging: uio: Remove unused uio_info mmap method Mandeep Sandhu
@ 2014-12-16 19:52 ` Mandeep Sandhu
  2014-12-16 19:52 ` [PATCH 4/4] staging: uio: Implement hotunplug support, using libunload Mandeep Sandhu
  3 siblings, 0 replies; 8+ messages in thread
From: Mandeep Sandhu @ 2014-12-16 19:52 UTC (permalink / raw)
  To: hjk, gregkh; +Cc: linux-kernel, viro, linux-fsdevel, Mandeep Sandhu

The problem of how to remove open files due to module unloading or
device hotunplugging keeps coming up.  We have multiple implementations
of roughly the same logic in proc, sysctl, sysfs, tun and now I am
working on yet another one for uio. It is time to start working on a
generic implementation.

This library does not aim to allow wrapping any arbitray set of file
operations and making it safe to unload any module. This library aims
to work in conjunction with the code implementiong an object to make it
safe to remove the object while file handles to it are still open.
libunload implements the necessary locking and logic to make it
striaght forward to implement file_operations for objects that are
removed at runtime.

It is hard to arrange for the ->close method of vm_operations_struct to
be called when an object is being removed, and this code doesn't even
attempt to help with that. Instead it is assumed that calling ->close
is not needed. Without close support mmap at hotunplug time is simply a
matter of calling umap_mapping_range() to invaildate the mappings, and
to arrange for vm_fault to return VM_FAULT_SIGBUS when the
unload_trylock fails.

Wait queues and fasync queues can safely be woken up after
unload_barrier making the semantics clean. The fasync entries can be
freed as a list of all of the file descriptors is kept. poll entries
can not be freed so the poll wait queue heads must be kept around. If
someone else's poll method is being wrapped, the wrapped poll wait
queue head could be freed, but it requires that there is a wrapping
wait queue head that is kept around. If there is no other way wrapping
a poll wait queue head seems practical but in general it isn't
particularly useful.

libunload is best understood from the perspective of code that calls
unload_barrier(). Past the unload barrier it is guaranteed that there
is no code in the critical sections protectecd by the unload lock, and
the unload release lock. Past the unload barrier it is safe to call the
release methods for remaining file descriptors, to ensure some logical
state does not persist.

Tested-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
Signed-off-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
---
 fs/Makefile            |   2 +-
 fs/libunload.c         | 169 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/unload.h |  35 ++++++++++
 3 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 fs/libunload.c
 create mode 100644 include/linux/unload.h

diff --git a/fs/Makefile b/fs/Makefile
index da0bbb4..7ad5341 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o fs_pin.o
+		stack.o fs_struct.o statfs.o fs_pin.o libunload.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/libunload.c b/fs/libunload.c
new file mode 100644
index 0000000..0a365bb
--- /dev/null
+++ b/fs/libunload.c
@@ -0,0 +1,169 @@
+#include <linux/fs.h>
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/unload.h>
+
+struct unload_barrier {
+	struct completion   completion;
+	int         releasers;
+};
+
+void unload_init(struct unload *unload)
+{
+	INIT_HLIST_HEAD(&unload->ufiles);
+	spin_lock_init(&unload->lock);
+	unload->active = 1;
+	unload->barrier = NULL;
+}
+EXPORT_SYMBOL_GPL(unload_init);
+
+void unload_file_init(struct unload_file *ufile,
+		      struct file *file,
+		      struct unload *unload)
+{
+	ufile->file = file;
+	ufile->unload = unload;
+	INIT_HLIST_NODE(&ufile->list);
+}
+EXPORT_SYMBOL_GPL(unload_file_init);
+
+bool unload_trylock(struct unload *unload)
+{
+	bool locked = false;
+
+	spin_lock(&unload->lock);
+	if (likely(!unload->barrier)) {
+		unload->active++;
+		locked = true;
+	}
+	spin_unlock(&unload->lock);
+	return locked;
+}
+EXPORT_SYMBOL_GPL(unload_trylock);
+
+static void __unload_unlock(struct unload *unload)
+{
+	unload->active--;
+	if ((unload->active == 0) && (unload->barrier->releasers == 0))
+		complete(&unload->barrier->completion);
+}
+
+void unload_unlock(struct unload *unload)
+{
+	spin_lock(&unload->lock);
+	__unload_unlock(unload);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_unlock);
+
+static void __unload_file_attach(struct unload_file *ufile,
+				 struct unload *unload)
+{
+	ufile->unload = unload;
+	hlist_add_head(&ufile->list, &unload->ufiles);
+}
+
+void unload_file_attach(struct unload_file *ufile, struct unload *unload)
+{
+	spin_lock(&unload->lock);
+	__unload_file_attach(ufile, unload);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_file_attach);
+
+static void __unload_file_detach(struct unload_file *ufile)
+{
+	hlist_del_init(&ufile->list);
+}
+
+void unload_file_detach(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+
+	spin_lock(&unload->lock);
+	__unload_file_detach(ufile);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_file_detach);
+
+struct unload_file *find_unload_file(struct unload *unload, struct file *file)
+{
+	struct unload_file *ufile;
+
+	spin_lock(&unload->lock);
+	hlist_for_each_entry(ufile, &unload->ufiles, list) {
+		if (ufile->file == file)
+			goto done;
+	}
+	ufile = NULL;
+done:
+	spin_unlock(&unload->lock);
+	return ufile;
+}
+EXPORT_SYMBOL_GPL(find_unload_file);
+
+bool unload_release_trylock(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+	bool locked = false;
+
+	spin_lock(&unload->lock);
+	if (!hlist_unhashed(&ufile->list))
+		locked = true;
+	spin_unlock(&unload->lock);
+	return locked;
+}
+EXPORT_SYMBOL_GPL(unload_release_trylock);
+
+void unload_release_unlock(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+	struct unload_barrier *barrier;
+
+	spin_lock(&unload->lock);
+	__unload_file_detach(ufile);
+	barrier = unload->barrier;
+	if (barrier) {
+		barrier->releasers -= 1;
+		if ((barrier->releasers == 0) && (unload->active == 0))
+			complete(&barrier->completion);
+	}
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_release_unlock);
+
+
+void unload_barrier(struct unload *unload)
+{
+	struct unload_barrier barrier;
+	struct unload_file *ufile;
+
+	/* Guarantee that when this function returns I am not
+	 * executing any code protected by the unload_lock or
+	 * unload_releas_lock, and that I will never again execute
+	 * code protected by those locks.
+	 *
+	 * Also guarantee the file count for every file remaining on
+	 * the unload ufiles list has been incremented.  The increment
+	 * of the file count guarantees __fput will not be called.
+	 */
+	init_completion(&barrier.completion);
+	barrier.releasers = 0;
+
+	spin_lock(&unload->lock);
+	unload->barrier = &barrier;
+
+	hlist_for_each_entry(ufile, &unload->ufiles, list)
+		if (!atomic_long_inc_not_zero(&ufile->file->f_count))
+			barrier.releasers++;
+	unload->active--;
+	if (unload->active || barrier.releasers) {
+		spin_unlock(&unload->lock);
+		wait_for_completion(&barrier.completion);
+		spin_lock(&unload->lock);
+	}
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_barrier);
diff --git a/include/linux/unload.h b/include/linux/unload.h
new file mode 100644
index 0000000..83d378f
--- /dev/null
+++ b/include/linux/unload.h
@@ -0,0 +1,35 @@
+#ifndef _LINUX_UNLOAD_H
+#define _LINUX_UNLOAD_H
+
+#include <linux/list.h>
+
+struct file;
+struct vm_operations_struct;
+struct unload_barrier;
+
+struct unload {
+	struct hlist_head   ufiles;
+	struct unload_barrier   *barrier;
+	spinlock_t      lock;
+	int         active;
+};
+
+struct unload_file {
+	struct unload       *unload;
+	struct hlist_node   list;
+	struct file         *file;
+};
+
+void unload_init(struct unload *unload);
+void unload_file_init(struct unload_file *ufile,
+		      struct file *file,
+		      struct unload *unload);
+bool unload_trylock(struct unload *unload);
+void unload_unlock(struct unload *unload);
+bool unload_release_trylock(struct unload_file *ufile);
+void unload_release_unlock(struct unload_file *ufile);
+void unload_file_attach(struct unload_file *ufile, struct unload *unload);
+void unload_file_detach(struct unload_file *ufile);
+struct unload_file *find_unload_file(struct unload *unload, struct file *file);
+void unload_barrier(struct unload *unload);
+#endif /* _LINUX_UNLOAD_H */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] staging: uio: Implement hotunplug support, using libunload
  2014-12-16 19:52 [PATCH 0/4] uio hotplug support Mandeep Sandhu
                   ` (2 preceding siblings ...)
  2014-12-16 19:52 ` [PATCH 3/4] staging: libunload: A library to help remove open files Mandeep Sandhu
@ 2014-12-16 19:52 ` Mandeep Sandhu
  3 siblings, 0 replies; 8+ messages in thread
From: Mandeep Sandhu @ 2014-12-16 19:52 UTC (permalink / raw)
  To: hjk, gregkh; +Cc: linux-kernel, viro, linux-fsdevel, Mandeep Sandhu

With this change it is possible to remove a module that implements
a uio device, or to remove the underlying hardware device of a uio
device withot crashing the kernel, or causing user space more problems
than just an I/O error.

The implicit module parameter that was passed by uio_register_device
to __uio_register_device has been removed as it is now safe to call
uio_unregister_device while uio file handles are open.

In uio all file methods (except release) now must perform their work
inside of the unload_lock. This guarantees that uio_unregister_device
won't return until all currently running uio methods complete, and
it allows uio to return -EIO after the underlying device has been
unregistered.

The fops->release method must perform the bulk of it's work under the
unload_release_lock. With release more needs to be done than to
more than wait for the method to finish executing, the release also
needs to handle the info->releae being called early on files that
are not closed when unregister is running. Taking the unload_release
lock fails if and only if the info->release method has already been
called at the time fops->release runs.

Instead of holding a module reference, the uio files have been modified
to instead hold an extra reference to struct uio_device, ensuring
that the uio file_operations functions will not access freed memory
even after the uio device has been unregistered.

The bulk of the changes are simply modifying the code flow of the
uio functions to run under the appropriate unload lock.

uio_open is modestly special in that it needs to run under the
uio_unload lock (to keep the uio device from unloading), but not have
it's file structure on the uio unload list until it is certain
that the open will succeed (ensuring that release will not be called
on a file that we failed to open). uio_open also carefully grabs
the reference to the uio_device under the idr_mutex to guarnatee
the the uio_device reference held by the idr data structure
is valid while we are incrementing the uio_device reference count.

uio_device_unregister does a fair bit more than what it used to. All
of the extra work is essentially handling the early shutdown of file
handles when a device is unregistered while files are still open.

Tested-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
Signed-off-by: Mandeep Sandhu <mandeep.sandhu@cyaninc.com>
---
 drivers/uio/uio.c          | 258 +++++++++++++++++++++++++++++++++------------
 include/linux/uio_driver.h |  11 +-
 2 files changed, 194 insertions(+), 75 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 91b9332..4f24d94 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -23,7 +23,9 @@
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/cdev.h>
+#include <linux/file.h>
 #include <linux/uio_driver.h>
+#include <linux/unload.h>
 
 #define UIO_MAX_DEVICES		(1U << MINORBITS)
 
@@ -415,60 +417,73 @@ static irqreturn_t uio_interrupt(int irq, void *dev_id)
 }
 
 struct uio_listener {
-	struct uio_device *dev;
+	struct unload_file ufile;
 	s32 event_count;
 };
 
+#define listener_idev(LISTENER) \
+	container_of((LISTENER)->ufile.unload, struct uio_device, unload)
+
 static int uio_open(struct inode *inode, struct file *filep)
 {
 	struct uio_device *idev;
-	struct uio_listener *listener;
+	struct uio_listener *listener = NULL;
 	int ret = 0;
 
 	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
+	if (idev)
+		get_device(&idev->device);
 	mutex_unlock(&minor_lock);
-	if (!idev) {
-		ret = -ENODEV;
-		goto out;
-	}
 
-	if (!try_module_get(idev->owner)) {
-		ret = -ENODEV;
-		goto out;
-	}
+	ret = -ENODEV;
+	if (!idev)
+		goto err_out;
 
 	listener = kmalloc(sizeof(*listener), GFP_KERNEL);
-	if (!listener) {
-		ret = -ENOMEM;
-		goto err_alloc_listener;
-	}
+	ret = -ENOMEM;
+	if (!listener)
+		goto err_out;
 
-	listener->dev = idev;
+	unload_file_init(&listener->ufile, filep, &idev->unload);
 	listener->event_count = atomic_read(&idev->event);
 	filep->private_data = listener;
 
-	if (idev->info->open) {
+	/*
+	 * Take the users lock before opening the file to ensure the
+	 * file is not unregistered while it is being opened.
+	 */
+	ret = -EIO;
+	if (!unload_trylock(&idev->unload))
+		goto err_out;
+
+	ret = 0;
+	if (idev->info->open)
 		ret = idev->info->open(idev->info, inode);
-		if (ret)
-			goto err_infoopen;
-	}
-	return 0;
 
-err_infoopen:
-	kfree(listener);
+	/*
+	 * Add to the listener list only if the open succeeds.
+	 * This ensures that uio_unregister_device won't call
+	 * release unless open has succeeded.
+	 */
+	if (ret == 0)
+		unload_file_attach(&listener->ufile, &idev->unload);
 
-err_alloc_listener:
-	module_put(idev->owner);
+	unload_unlock(&idev->unload);
 
-out:
+err_out:
+	if (ret) {
+		if (idev)
+			put_device(&idev->device);
+		kfree(listener);
+	}
 	return ret;
 }
 
 static int uio_fasync(int fd, struct file *filep, int on)
 {
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
 
 	return fasync_helper(fd, filep, on, &idev->async_queue);
 }
@@ -477,44 +492,61 @@ static int uio_release(struct inode *inode, struct file *filep)
 {
 	int ret = 0;
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
 
-	if (idev->info->release)
-		ret = idev->info->release(idev->info, inode);
+	if (unload_release_trylock(&listener->ufile)) {
+		if (idev->info->release)
+			ret = idev->info->release(idev->info, inode);
 
-	module_put(idev->owner);
+		unload_release_unlock(&listener->ufile);
+	}
 	kfree(listener);
+	put_device(&idev->device);
 	return ret;
 }
 
 static unsigned int uio_poll(struct file *filep, poll_table *wait)
 {
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
+	unsigned int ret;
+
+	if (!unload_trylock(&idev->unload))
+		return POLLERR;
 
+	ret = POLLERR;
 	if (!idev->info->irq)
-		return -EIO;
+		goto out;
 
 	poll_wait(filep, &idev->wait, wait);
+	ret = 0;
 	if (listener->event_count != atomic_read(&idev->event))
-		return POLLIN | POLLRDNORM;
-	return 0;
+		ret = POLLIN | POLLRDNORM;
+
+out:
+	unload_unlock(&idev->unload);
+	return ret;
 }
 
 static ssize_t uio_read(struct file *filep, char __user *buf,
 			size_t count, loff_t *ppos)
 {
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
 	DECLARE_WAITQUEUE(wait, current);
 	ssize_t retval;
 	s32 event_count;
 
-	if (!idev->info->irq)
+	if (!unload_trylock(&idev->unload))
 		return -EIO;
 
+	retval = -EIO;
+	if (!idev->info->irq)
+		goto out;
+
+	retval = -EINVAL;
 	if (count != sizeof(s32))
-		return -EINVAL;
+		goto out;
 
 	add_wait_queue(&idev->wait, &wait);
 
@@ -541,12 +573,21 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
 			retval = -ERESTARTSYS;
 			break;
 		}
+		unload_unlock(&idev->unload);
+
 		schedule();
+
+		if (!unload_trylock(&idev->unload)) {
+			retval = -EIO;
+			break;
+		}
 	} while (1);
 
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&idev->wait, &wait);
 
+out:
+	unload_unlock(&idev->unload);
 	return retval;
 }
 
@@ -554,24 +595,33 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 			size_t count, loff_t *ppos)
 {
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
 	ssize_t retval;
 	s32 irq_on;
 
-	if (!idev->info->irq)
+	if (!unload_trylock(&idev->unload))
 		return -EIO;
 
+	retval = -EIO;
+	if (!idev->info->irq)
+		goto out;
+
+	retval = -EINVAL;
 	if (count != sizeof(s32))
-		return -EINVAL;
+		goto out;
 
+	retval = -ENOSYS;
 	if (!idev->info->irqcontrol)
-		return -ENOSYS;
+		goto out;
 
+	retval = -EFAULT;
 	if (copy_from_user(&irq_on, buf, count))
-		return -EFAULT;
+		goto out;
 
 	retval = idev->info->irqcontrol(idev->info, irq_on);
 
+out:
+	unload_unlock(&idev->unload);
 	return retval ? retval : sizeof(s32);
 }
 
@@ -587,17 +637,23 @@ static int uio_find_mem_index(struct vm_area_struct *vma)
 	return -1;
 }
 
-static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int uio_logical_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct uio_device *idev = vma->vm_private_data;
 	struct page *page;
 	unsigned long offset;
 	void *addr;
+	int ret;
+	int mi;
 
-	int mi = uio_find_mem_index(vma);
-	if (mi < 0)
+	if (!unload_trylock(&idev->unload))
 		return VM_FAULT_SIGBUS;
 
+	ret = VM_FAULT_SIGBUS;
+	mi = uio_find_mem_index(vma);
+	if (mi < 0)
+		goto out;
+
 	/*
 	 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
 	 * to use mem[N].
@@ -611,26 +667,38 @@ static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		page = vmalloc_to_page(addr);
 	get_page(page);
 	vmf->page = page;
-	return 0;
+	ret = 0;
+out:
+	unload_unlock(&idev->unload);
+	return ret;
 }
 
 static const struct vm_operations_struct uio_logical_vm_ops = {
-	.fault = uio_vma_fault,
+	.fault = uio_logical_fault,
 };
 
-static int uio_mmap_logical(struct vm_area_struct *vma)
+static int uio_physical_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &uio_logical_vm_ops;
-	return 0;
+	/* The pages should always be mapped, so we should never get
+	 * here unless the device has been hotunplugged
+	 */
+	return VM_FAULT_SIGBUS;
 }
 
 static const struct vm_operations_struct uio_physical_vm_ops = {
+	.fault = uio_physical_fault,
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 	.access = generic_access_phys,
 #endif
 };
 
+static int uio_mmap_logical(struct vm_area_struct *vma)
+{
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = &uio_logical_vm_ops;
+	return 0;
+}
+
 static int uio_mmap_physical(struct vm_area_struct *vma)
 {
 	struct uio_device *idev = vma->vm_private_data;
@@ -647,6 +715,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 
 	vma->vm_ops = &uio_physical_vm_ops;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &uio_physical_vm_ops;
 
 	/*
 	 * We cannot use the vm_iomap_memory() helper here,
@@ -667,34 +736,48 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 {
 	struct uio_listener *listener = filep->private_data;
-	struct uio_device *idev = listener->dev;
+	struct uio_device *idev = listener_idev(listener);
 	int mi;
 	unsigned long requested_pages, actual_pages;
 
+	int ret;
+
+	if (!unload_trylock(&idev->unload))
+		return -EIO;
+
+	ret = -EINVAL;
 	if (vma->vm_end < vma->vm_start)
-		return -EINVAL;
+		goto out;
 
 	vma->vm_private_data = idev;
 
+	ret = -EINVAL;
 	mi = uio_find_mem_index(vma);
 	if (mi < 0)
-		return -EINVAL;
+		goto out;
 
 	requested_pages = vma_pages(vma);
 	actual_pages = ((idev->info->mem[mi].addr & ~PAGE_MASK)
 			+ idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT;
+	ret = -EINVAL;
 	if (requested_pages > actual_pages)
-		return -EINVAL;
+		goto out;
 
 	switch (idev->info->mem[mi].memtype) {
-		case UIO_MEM_PHYS:
-			return uio_mmap_physical(vma);
-		case UIO_MEM_LOGICAL:
-		case UIO_MEM_VIRTUAL:
-			return uio_mmap_logical(vma);
-		default:
-			return -EINVAL;
+	case UIO_MEM_PHYS:
+		ret = uio_mmap_physical(vma);
+		break;
+	case UIO_MEM_LOGICAL:
+	case UIO_MEM_VIRTUAL:
+		ret = uio_mmap_logical(vma);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
 	}
+out:
+	unload_unlock(&idev->unload);
+	return ret;
 }
 
 static const struct file_operations uio_fops = {
@@ -793,9 +876,7 @@ static void uio_device_release(struct device *dev)
  *
  * returns zero on success or a negative error code.
  */
-int __uio_register_device(struct module *owner,
-			  struct device *parent,
-			  struct uio_info *info)
+int uio_register_device(struct device *parent, struct uio_info *info)
 {
 	struct uio_device *idev;
 	int ret = 0;
@@ -810,10 +891,10 @@ int __uio_register_device(struct module *owner,
 		return -ENOMEM;
 	}
 
-	idev->owner = owner;
 	idev->info = info;
 	init_waitqueue_head(&idev->wait);
 	atomic_set(&idev->event, 0);
+	unload_init(&idev->unload);
 
 	ret = uio_get_minor(idev);
 	if (ret)
@@ -856,7 +937,7 @@ err_device_create:
 	uio_free_minor(idev);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__uio_register_device);
+EXPORT_SYMBOL_GPL(uio_register_device);
 
 /**
  * uio_unregister_device - unregister a industrial IO device
@@ -866,16 +947,59 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
 void uio_unregister_device(struct uio_info *info)
 {
 	struct uio_device *idev;
+	struct unload_file *ufile;
+	struct hlist_node *n;
 
 	if (!info || !info->uio_dev)
 		return;
 
 	idev = info->uio_dev;
 
+	/*
+	 * Stop accepting new callers into the uio functions, and wait
+	 * until all existing callers into the uio functions are done.
+	 */
+	unload_barrier(&idev->unload);
+
+	/* Notify listeners that the uio devices has gone. */
+	wake_up_interruptible_all(&idev->wait);
+	kill_fasync(&idev->async_queue, SIGIO, POLL_ERR);
+
+	/*
+	 * unload_barrier froze the idev->unload.ufiles list and grabbed
+	 * a reference to every file on that list.  Now cleanup those files
+	 * and drop the extra reference.
+	 */
+	hlist_for_each_entry_safe(ufile, n, &idev->unload.ufiles, list) {
+		struct file *file = ufile->file;
+		struct inode *inode = file->f_dentry->d_inode;
+
+		/* Cause all mappings to trigger a SIGBUS */
+		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+		/* Remove the file from the fasync list because any
+		 * future attempts to add/remove this file from this
+		 * list will return -EIO.
+		 */
+		fasync_helper(-1, file, 0, &idev->async_queue);
+
+		/* Now that userspace is totally blocked off run the
+		 * release code to clean up, the uio devices data
+		 * structures.
+		 */
+		if (info->release)
+			info->release(info, inode);
+
+		/* Forget about this file */
+		unload_file_detach(ufile);
+		fput(file);
+	}
+
 	uio_free_minor(idev);
 
 	uio_dev_del_attributes(idev);
 
+	idev->info = NULL;
 	device_unregister(&idev->device);
 
 	return;
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 839c4c2..c8a4deb 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -17,6 +17,7 @@
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
+#include <linux/unload.h>
 
 struct module;
 struct uio_map;
@@ -65,7 +66,6 @@ struct uio_port {
 #define MAX_UIO_PORT_REGIONS	5
 
 struct uio_device {
-        struct module           *owner;
 	struct device           device;
         int                     minor;
         atomic_t                event;
@@ -74,6 +74,7 @@ struct uio_device {
         struct uio_info         *info;
         struct kobject          *map_dir;
         struct kobject          *portio_dir;
+	struct unload           unload;
 };
 
 /**
@@ -107,13 +108,7 @@ struct uio_info {
 };
 
 extern int __must_check
-	__uio_register_device(struct module *owner,
-			      struct device *parent,
-			      struct uio_info *info);
-
-/* use a define to avoid include chaining to get THIS_MODULE */
-#define uio_register_device(parent, info) \
-	__uio_register_device(THIS_MODULE, parent, info)
+	uio_register_device(struct device *parent, struct uio_info *info);
 
 extern void uio_unregister_device(struct uio_info *info);
 extern void uio_event_notify(struct uio_info *info);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-01-09 23:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-16 19:52 [PATCH 0/4] uio hotplug support Mandeep Sandhu
2014-12-16 19:52 ` [PATCH 1/4] staging: uio: Simplify the lifetime logic of struct uio_device Mandeep Sandhu
2015-01-09 23:27   ` Greg KH
2015-01-09 23:36     ` Mandeep Sandhu
2015-01-09 23:45       ` Greg KH
2014-12-16 19:52 ` [PATCH 2/4] staging: uio: Remove unused uio_info mmap method Mandeep Sandhu
2014-12-16 19:52 ` [PATCH 3/4] staging: libunload: A library to help remove open files Mandeep Sandhu
2014-12-16 19:52 ` [PATCH 4/4] staging: uio: Implement hotunplug support, using libunload Mandeep Sandhu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).