* PATCH (2.4.5): /dev/poll support (3rd time lucky)
@ 2001-06-27 19:33 Zarjazz
0 siblings, 0 replies; only message in thread
From: Zarjazz @ 2001-06-27 19:33 UTC (permalink / raw)
To: linux-kernel; +Cc: kdc, linux-scalability
[-- Attachment #1: Type: text/plain, Size: 803 bytes --]
Not my day it seems ! Hopefully I remembered to attach the file this time :)
--
Hi,
this patch adds Solaris 7/8 like /dev/poll support to the kernel.
I can claim no real credit for this as basically this is a fixed version of
a patch available from http://www.citi.umich.edu/projects/linux-scalability/
to compile correctly with 2.4.5 that only seemed to work with the 2.3.x
devel branch. The reason for this is so I can compile & test an application
on my home linux pc when I'm not around my nice work Solaris boxes :)
Please note, I have not got the knowledge of kernel development to know if
this patch is broken or badly written. It may be bugged and/or worse than
the standard poll() call but my application works so I'll leave profiling
etc to people more knowledgable than me.
Vince.
[-- Attachment #2: devpoll-2.4.5.patch --]
[-- Type: application/octet-stream, Size: 31617 bytes --]
diff -rNu linux.orig/drivers/char/Config.in linux/drivers/char/Config.in
--- linux.orig/drivers/char/Config.in Wed Mar 7 03:44:34 2001
+++ linux/drivers/char/Config.in Wed Jun 27 16:41:00 2001
@@ -158,6 +158,7 @@
dep_tristate 'Intel i8x0 Random Number Generator support' CONFIG_INTEL_RNG $CONFIG_PCI
tristate '/dev/nvram support' CONFIG_NVRAM
+tristate '/dev/poll support' CONFIG_DEVPOLL
tristate 'Enhanced Real Time Clock Support' CONFIG_RTC
if [ "$CONFIG_IA64" = "y" ]; then
bool 'EFI Real Time Clock Services' CONFIG_EFI_RTC
diff -rNu linux.orig/drivers/char/Makefile linux/drivers/char/Makefile
--- linux.orig/drivers/char/Makefile Wed May 16 18:27:02 2001
+++ linux/drivers/char/Makefile Wed Jun 27 16:43:07 2001
@@ -170,6 +170,7 @@
obj-$(CONFIG_PC110_PAD) += pc110pad.o
obj-$(CONFIG_RTC) += rtc.o
obj-$(CONFIG_EFI_RTC) += efirtc.o
+obj-$(CONFIG_DEVPOLL) += devpoll.o
ifeq ($(CONFIG_PPC),)
obj-$(CONFIG_NVRAM) += nvram.o
endif
diff -rNu linux.orig/drivers/char/devpoll.c linux/drivers/char/devpoll.c
--- linux.orig/drivers/char/devpoll.c Thu Jan 1 01:00:00 1970
+++ linux/drivers/char/devpoll.c Wed Jun 27 18:55:30 2001
@@ -0,0 +1,756 @@
+/*
+ * /dev/poll
+ * by Niels Provos <provos@citi.umich.edu>
+ *
+ * provides poll() support via /dev/poll as in Solaris.
+ *
+ * Linux 2.3.x port by Michal Ostrowski
+ * Linux 2.4.x patches by Vincent Sweeney <v.sweeney@dexterus.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/wrapper.h>
+
+#include <linux/devpoll.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+/*#define DEBUG 1 */
+#ifdef DEBUG
+#define DPRINTK(x) printk x
+#define DNPRINTK(n,x) if (n <= DEBUG) printk x
+#else
+#define DPRINTK(x)
+#define DNPRINTK(n,x)
+#endif
+
+/* Various utility functions */
+
+#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
+
+/* Do dynamic hashing */
+
+#define INITIAL_BUCKET_BITS 6
+#define MAX_BUCKET_BITS 16
+#define RESIZE_LENGTH 2
+
+static void free_pg_vec(struct devpoll *dp);
+
+/* Initalize the hash table */
+
+int
+dp_init(struct devpoll *dp)
+{
+ int i;
+ int num_buckets;
+ DNPRINTK(3, (KERN_INFO "/dev/poll: dp_init\n"));
+
+ dp->dp_lock = RW_LOCK_UNLOCKED;
+ dp->dp_entries = 0;
+ dp->dp_max = 0;
+ dp->dp_avg = dp->dp_count = 0;
+ dp->dp_cached = dp->dp_calls = 0;
+ dp->dp_bucket_bits = INITIAL_BUCKET_BITS;
+ dp->dp_bucket_mask = (1 << INITIAL_BUCKET_BITS) - 1;
+
+ num_buckets = (dp->dp_bucket_mask + 1);
+ dp->dp_tab = kmalloc(num_buckets * sizeof (struct list_head),
+ GFP_KERNEL);
+
+ if (!dp->dp_tab)
+ return -ENOMEM;
+
+ for (i = 0; i < num_buckets; i++) {
+ INIT_LIST_HEAD(&dp->dp_tab[i]);
+ }
+
+ return (0);
+}
+
+int
+dp_resize(struct devpoll *dp)
+{
+ u_int16_t new_mask, old_mask;
+ int i;
+ struct list_head *new_tab, *old_tab;
+ struct dp_fd *dpfd;
+ unsigned long flags;
+ int num_buckets;
+
+ old_mask = dp->dp_bucket_mask;
+ new_mask = (old_mask + 1) * 2 - 1;
+ num_buckets = new_mask + 1;
+
+ DPRINTK((KERN_INFO "/dev/poll: resize %d -> %d\n", old_mask, new_mask));
+
+ new_tab = kmalloc(num_buckets * sizeof (struct list_head), GFP_KERNEL);
+ if (!new_tab)
+ return -ENOMEM;
+
+ for (i = 0; i < num_buckets; i++) {
+ INIT_LIST_HEAD(&new_tab[i]);
+ }
+
+ old_tab = dp->dp_tab;
+
+ /* Rehash all entries */
+ write_lock_irqsave(&dp->dp_lock, flags);
+ for (i = 0; i <= old_mask; i++) {
+ while (!list_empty(&old_tab[i])) {
+ dpfd = list_entry(old_tab[i].next, struct dp_fd, next);
+ list_del(&dpfd->next);
+ INIT_LIST_HEAD(&dpfd->next);
+ list_add(&dpfd->next,
+ &new_tab[dpfd->pfd.fd & new_mask]);
+ }
+ }
+
+ dp->dp_tab = new_tab;
+ dp->dp_bucket_bits++;
+ dp->dp_bucket_mask = new_mask;
+ write_unlock_irqrestore(&dp->dp_lock, flags);
+
+ kfree(old_tab);
+
+ return (0);
+}
+
+int
+dp_insert(struct devpoll *dp, struct pollfd *pfd)
+{
+ struct dp_fd *dpfd;
+ u_int16_t bucket = pfd->fd & dp->dp_bucket_mask;
+ unsigned long flags;
+ struct file *file;
+
+ dpfd = kmalloc(sizeof (struct dp_fd), GFP_KERNEL);
+ if (!dpfd)
+ return -ENOMEM;
+
+ dpfd->flags = 0;
+ set_bit(DPH_DIRTY, &dpfd->flags);
+ dpfd->pfd = *pfd;
+ dpfd->pfd.revents = 0;
+ INIT_LIST_HEAD(&dpfd->next);
+
+ write_lock_irqsave(&dp->dp_lock, flags);
+
+ list_add(&dpfd->next, &dp->dp_tab[bucket]);
+
+ file = fcheck(pfd->fd);
+ if (file != NULL) {
+ write_lock(&(file)->f_dplock);
+ poll_backmap(pfd->fd, dpfd, &(file)->f_backmap);
+ write_unlock(&(file)->f_dplock);
+ set_bit(DPH_BACKMAP, &(dpfd)->flags);
+ }
+ write_unlock_irqrestore(&dp->dp_lock, flags);
+
+ dp->dp_entries++;
+ /* Check if we need to resize the hash table */
+ if ((dp->dp_entries >> dp->dp_bucket_bits) > RESIZE_LENGTH &&
+ dp->dp_bucket_bits < MAX_BUCKET_BITS)
+ dp_resize(dp);
+
+ return (0);
+}
+
+struct dp_fd *
+dp_find(struct devpoll *dp, int fd)
+{
+ struct dp_fd *dpfd = NULL;
+ struct list_head *lh;
+ u_int16_t bucket = fd & dp->dp_bucket_mask;
+
+ read_lock(&dp->dp_lock);
+ list_for_each(lh, &dp->dp_tab[bucket]) {
+ dpfd = list_entry(lh, struct dp_fd, next);
+ if (dpfd->pfd.fd == fd)
+ break;
+ dpfd = NULL;
+ }
+
+ read_unlock(&dp->dp_lock);
+ DNPRINTK(2, (KERN_INFO "dp_find: %d -> %p\n", fd, dpfd));
+
+ return dpfd;
+}
+
+void
+dp_delete(struct devpoll *dp, struct dp_fd *dpfd)
+{
+ unsigned long flags;
+ int fd;
+ struct file *filp;
+
+ write_lock_irqsave(&dp->dp_lock, flags);
+ list_del(&dpfd->next);
+
+ INIT_LIST_HEAD(&dpfd->next);
+
+ /* Remove backmaps if necessary */
+ if (current->files) {
+ fd = dpfd->pfd.fd;
+ filp = fcheck(fd);
+
+ if (test_bit(DPH_BACKMAP, &dpfd->flags) &&
+ filp && filp->f_backmap) {
+ write_lock(&filp->f_dplock);
+ poll_remove_backmap(&filp->f_backmap, fd,
+ current->files);
+ write_unlock(&filp->f_dplock);
+ }
+ }
+ write_unlock_irqrestore(&dp->dp_lock, flags);
+
+ kfree(dpfd);
+
+ dp->dp_entries--;
+}
+
+void
+dp_free(struct devpoll *dp)
+{
+ int i;
+ struct dp_fd *dpfd = NULL;
+
+ lock_kernel();
+ for (i = 0; i <= dp->dp_bucket_mask; i++) {
+ while (!list_empty(&dp->dp_tab[i])) {
+ dpfd =
+ list_entry(dp->dp_tab[i].next, struct dp_fd, next);
+ dp_delete(dp, dpfd);
+ }
+ }
+ unlock_kernel();
+
+ kfree(dp->dp_tab);
+}
+
+/*
+ * poll the fds that we keep in our state, return after we reached
+ * max changed fds or are done.
+ * XXX - I do not like how the wait table stuff is done.
+ */
+
+int
+dp_poll(struct devpoll *dp, int max, poll_table * wait,
+ long timeout, struct pollfd *rfds, int usemmap)
+{
+ int count = 0;
+ lock_kernel();
+ read_lock(&dp->dp_lock);
+ for (;;) {
+ unsigned int j = 0;
+ struct dp_fd *dpfd = NULL;
+ struct pollfd *fdpnt, pfd;
+ struct file *file;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (j = 0; (j <= dp->dp_bucket_mask) && count < max; j++) {
+ struct list_head *lh;
+ list_for_each(lh, &dp->dp_tab[j]) {
+
+ int fd;
+ unsigned int mask = 0;
+ dpfd = list_entry(lh, struct dp_fd, next);
+
+ if (count >= max) {
+ break;
+ }
+
+ fdpnt = &dpfd->pfd;
+ fd = fdpnt->fd;
+
+ /* poll_wait increments f_count if needed */
+ file = fcheck(fd);
+ if (file == NULL) {
+ /* Got to move backward first;
+ * dp_delete will remove lh from
+ * the list otherwise
+ */
+ lh = lh->prev;
+ dp_delete(dp, dpfd);
+ dpfd = NULL;
+ continue;
+ }
+
+ mask = fdpnt->revents;
+ if (test_and_clear_bit(DPH_DIRTY,
+ &dpfd->flags) ||
+ wait != NULL || (mask & fdpnt->events)) {
+
+ mask = DEFAULT_POLLMASK;
+ if (file->f_op && file->f_op->poll)
+ mask =
+ file->f_op->poll(file,
+ wait);
+ /* if POLLHINT not supported by file
+ * then set bit to dirty ---
+ * must poll this file every time,
+ * otherwise bit will be set by
+ * calls to dp_add_hint
+ */
+ if (!(mask & POLLHINT))
+ set_bit(DPH_DIRTY,
+ &dpfd->flags);
+ fdpnt->revents = mask;
+ } else
+ dp->dp_cached++;
+
+ dp->dp_calls++;
+
+ mask &= fdpnt->events | POLLERR | POLLHUP;
+ if (mask) {
+ wait = NULL;
+ count++;
+
+ if (usemmap) {
+ *rfds = *fdpnt;
+ rfds->revents = mask;
+ } else {
+ pfd = *fdpnt;
+ pfd.revents = mask;
+ __copy_to_user(rfds, &pfd,
+ sizeof (struct
+ pollfd));
+ }
+
+ rfds++;
+ }
+ }
+ }
+
+ wait = NULL;
+ if (count || !timeout || signal_pending(current))
+ break;
+ read_unlock(&dp->dp_lock);
+ timeout = schedule_timeout(timeout);
+ read_lock(&dp->dp_lock);
+ }
+ set_current_state(TASK_RUNNING);
+ read_unlock(&dp->dp_lock);
+ unlock_kernel();
+
+ if (!count && signal_pending(current))
+ return -EINTR;
+
+ return count;
+}
+
+/*
+ * close a /dev/poll
+ */
+
+static int
+close_devpoll(struct inode *inode, struct file *file)
+{
+ struct devpoll *dp = file->private_data;
+
+ DNPRINTK(1,
+ (KERN_INFO "close /dev/poll, max: %d, avg: %d(%d/%d) %d/%d\n",
+ dp->dp_max, dp->dp_avg / dp->dp_count, dp->dp_avg,
+ dp->dp_count, dp->dp_cached, dp->dp_calls));
+
+ /* free allocated memory */
+ if (dp->dp_memvec)
+ free_pg_vec(dp);
+
+ /* Free the hash table */
+ dp_free(dp);
+
+ kfree(dp);
+
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+/*
+ * open a /dev/poll
+ */
+
+static int
+open_devpoll(struct inode *inode, struct file *file)
+{
+ struct devpoll *dp;
+ int r;
+
+ /* allocated state */
+ dp = kmalloc(sizeof (struct devpoll), GFP_KERNEL);
+ if (dp == NULL)
+ return -ENOMEM;
+
+ memset(dp, 0, sizeof (struct devpoll));
+ if ((r = dp_init(dp))) {
+ kfree(dp);
+ return r;
+ }
+
+ file->private_data = dp;
+
+ MOD_INC_USE_COUNT;
+
+ DNPRINTK(3, (KERN_INFO "open /dev/poll\n"));
+
+ return 0;
+}
+
+/*
+ * write to /dev/poll:
+ * a user writes struct pollfds and we add them to our list, or remove
+ * them if (events & POLLREMOVE) is true
+ */
+
+static int
+write_devpoll(struct file *file, const char *buffer, size_t count,
+ loff_t * ppos)
+{
+ int r, rcount;
+ struct devpoll *dp = file->private_data;
+ struct pollfd pfd;
+ struct dp_fd *dpfd;
+#ifdef DEBUG
+ int add = 0, delete = 0, change = 0;
+#endif
+
+ DNPRINTK(3, (KERN_INFO "write /dev/poll %i\n", count));
+
+ if (count % sizeof (struct pollfd))
+ return -EINVAL;
+
+ if ((r = verify_area(VERIFY_READ, buffer, count)))
+ return r;
+
+ rcount = count;
+
+ lock_kernel();
+
+ while (count > 0) {
+ __copy_from_user(&pfd, buffer, sizeof (pfd)); /* no check */
+
+ dpfd = dp_find(dp, pfd.fd);
+
+ if (pfd.fd >= current->files->max_fds ||
+ current->files->fd[pfd.fd] == NULL) {
+ /* Be tolerant, maybe the close happened already */
+ pfd.events = POLLREMOVE;
+ }
+ /* See if we need to remove the file descriptor. If it
+ * already exists OR the event fields, otherwise insert
+ */
+ if (pfd.events & POLLREMOVE) {
+ if (dpfd)
+ dp_delete(dp, dpfd);
+#ifdef DEBUG
+ delete++;
+#endif
+ } else if (dpfd) {
+ /* XXX dpfd->pfd.events |= pfd.events; */
+ dpfd->pfd.events = pfd.events;
+#ifdef DEBUG
+ change++;
+#endif
+ } else {
+ dp_insert(dp, &pfd);
+#ifdef DEBUG
+ add++;
+#endif
+ }
+
+ buffer += sizeof (pfd);
+ count -= sizeof (pfd);
+ }
+
+ unlock_kernel();
+
+ if (dp->dp_max < dp->dp_entries) {
+ dp->dp_max = dp->dp_entries;
+ DNPRINTK(2, (KERN_INFO "/dev/poll: new max %d\n", dp->dp_max));
+ }
+
+ DNPRINTK(3, (KERN_INFO "write /dev/poll: %d entries (%d/%d/%d)\n",
+ dp->dp_entries, add, delete, change));
+
+ return (rcount);
+}
+
+static int
+ioctl_devpoll(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct devpoll *dp = file->private_data;
+ unsigned mapsize = 0;
+ unsigned num_pages = 0;
+ int i = 0;
+ switch (cmd) {
+ case DP_ALLOC:
+ if (arg > current->rlim[RLIMIT_NOFILE].rlim_cur)
+ return -EINVAL;
+ if (dp->dp_mmap)
+ return -EPERM;
+
+ mapsize = DP_MMAP_SIZE(arg);
+ num_pages = (PAGE_ALIGN(mapsize) >> PAGE_SHIFT);
+
+ dp->dp_memvec = kmalloc(num_pages * sizeof (unsigned long *),
+ GFP_KERNEL);
+
+ if (dp->dp_memvec == NULL)
+ return -EINVAL;
+
+ memset(dp->dp_memvec, 0, num_pages * sizeof (unsigned long *));
+
+ for (i = 0; i < num_pages; ++i) {
+ struct page *page, *page_end;
+
+ dp->dp_memvec[i] =
+ (u_char *) __get_free_pages(GFP_KERNEL, 0);
+ if (!dp->dp_memvec[i]) {
+ free_pg_vec(dp);
+ return -ENOMEM;
+ }
+
+ page_end =
+ virt_to_page(dp->dp_memvec[i] + PAGE_SIZE - 1);
+ for (page = virt_to_page(dp->dp_memvec[i]);
+ page <= page_end; page++)
+ set_bit(PG_reserved, &page->flags);
+
+ ++dp->dp_numvec;
+ }
+
+ dp->dp_nfds = arg;
+
+ DPRINTK((KERN_INFO "allocated %d pollfds\n", dp->dp_nfds));
+
+ return 0;
+ case DP_FREE:
+ if (atomic_read(&dp->dp_mmapped))
+ return -EBUSY;
+
+ if (dp->dp_memvec[i]) {
+ free_pg_vec(dp);
+ }
+
+ DPRINTK((KERN_INFO "freed %d pollfds\n", dp->dp_nfds));
+ dp->dp_nfds = 0;
+
+ return 0;
+ case DP_ISPOLLED:{
+ struct pollfd pfd;
+ struct dp_fd *dpfd;
+
+ if (copy_from_user(&pfd, (void *) arg, sizeof (pfd)))
+ return -EFAULT;
+ dpfd = dp_find(dp, pfd.fd);
+ if (dpfd == NULL)
+ return (0);
+
+ /* We poll this fd, return the evens we poll on */
+ pfd.events = dpfd->pfd.events;
+ pfd.revents = 0;
+
+ if (copy_to_user((void *) arg, &pfd, sizeof (pfd)))
+ return -EFAULT;
+ return (1);
+ }
+ case DP_POLL:{
+ struct dvpoll dopoll;
+ int nfds, usemmap = 0;
+ unsigned long timeout;
+ poll_table wait;
+ struct pollfd *rpfds = NULL;
+
+ if (copy_from_user
+ (&dopoll, (void *) arg, sizeof (dopoll)))
+ return -EFAULT;
+
+ /* We do not need to check this value, its user space */
+ nfds = dopoll.dp_nfds;
+ if (nfds <= 0)
+ return -EINVAL;
+
+ if (dopoll.dp_fds == NULL) {
+ if (dp->dp_mmap == NULL)
+ return -EINVAL;
+ rpfds = (struct pollfd *) dp->dp_mmap;
+ usemmap = 1;
+ } else {
+ rpfds = dopoll.dp_fds;
+ if (verify_area(VERIFY_WRITE, rpfds,
+ nfds * sizeof (struct pollfd)))
+ return -EFAULT;
+ usemmap = 0;
+ }
+
+ timeout = dopoll.dp_timeout;
+ if (timeout) {
+ /* Careful about overflow in the intermediate values */
+ if ((unsigned long) timeout <
+ MAX_SCHEDULE_TIMEOUT / HZ)
+ timeout =
+ (timeout * HZ + 999) / 1000 + 1;
+ else /* Negative or overflow */
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ }
+
+ /* Initalize wait table */
+ poll_initwait(&wait);
+
+ nfds =
+ dp_poll(dp, nfds, &wait, timeout, rpfds, usemmap);
+
+ DNPRINTK(2,
+ (KERN_INFO "poll time %ld -> %d\n", timeout,
+ nfds));
+
+ poll_freewait(&wait);
+
+ dp->dp_avg += dp->dp_entries;
+ dp->dp_count++;
+
+ return nfds;
+ }
+ default:
+ DPRINTK((KERN_INFO "ioctl(%x) /dev/poll\n", cmd));
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static void
+free_pg_vec(struct devpoll *dp)
+{
+ int i;
+
+ for (i = 0; i < dp->dp_numvec; i++) {
+ if (dp->dp_memvec[i]) {
+ struct page *page, *page_end;
+
+ page_end =
+ virt_to_page(dp->dp_memvec[i] + PAGE_SIZE - 1);
+ for (page = virt_to_page(dp->dp_memvec[i]);
+ page <= page_end; page++)
+ clear_bit(PG_reserved, &page->flags);
+
+ free_pages((unsigned) dp->dp_memvec[i], 0);
+ }
+ }
+ kfree(dp->dp_memvec);
+ dp->dp_numvec = 0;
+}
+
+static void
+devpoll_mm_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct devpoll *dp = file->private_data;
+ if (dp)
+ atomic_inc(&dp->dp_mmapped);
+}
+
+static void
+devpoll_mm_close(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct devpoll *dp = file->private_data;
+ if (dp)
+ atomic_dec(&dp->dp_mmapped);
+}
+
+static struct vm_operations_struct devpoll_mmap_ops = {
+ open:devpoll_mm_open,
+ close:devpoll_mm_close,
+};
+
+/*
+ * mmap shared memory. the first half is an array of struct pollfd,
+ * followed by an array of ints to indicate which file descriptors
+ * changed status.
+ */
+
+static int
+mmap_devpoll(struct file *file, struct vm_area_struct *vma)
+{
+ struct devpoll *dp = file->private_data;
+ unsigned long start; /* Evil type to remap_page_range */
+ int i = 0;
+ int num_pages = 0;
+ size_t size, mapsize;
+
+ DPRINTK((KERN_INFO "mmap /dev/poll: %lx %lx\n",
+ vma->vm_start, vma->vm_pgoff << PAGE_SHIFT));
+
+ if ((vma->vm_pgoff << PAGE_SHIFT) != 0)
+ return -EINVAL;
+
+ /* Calculate how much memory we can map */
+ size = PAGE_ALIGN(DP_MMAP_SIZE(dp->dp_nfds));
+ mapsize = PAGE_ALIGN(vma->vm_end - vma->vm_start);
+ num_pages = mapsize >> PAGE_SHIFT;
+
+ /* Check if the requested size is within our size */
+ if (mapsize > dp->dp_numvec << PAGE_SHIFT)
+ return -EINVAL;
+
+ start = vma->vm_start;
+ atomic_set(&dp->dp_mmapped, 1);
+ for (i = 0; i < num_pages; ++i) {
+ if (remap_page_range(start, __pa(dp->dp_memvec[i]),
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EINVAL;
+ start += PAGE_SIZE;
+ }
+ dp->dp_mmap = (u_char *) vma->vm_start;
+ vma->vm_ops = &devpoll_mmap_ops;
+
+ DPRINTK((KERN_INFO "mmap /dev/poll: %lx %x\n", page, mapsize));
+ return 0;
+}
+
+struct file_operations devpoll_fops = {
+ write:write_devpoll,
+ ioctl:ioctl_devpoll,
+ mmap:mmap_devpoll,
+ open:open_devpoll,
+ release:close_devpoll
+};
+
+static struct miscdevice devpoll = {
+ DEVPOLL_MINOR, "devpoll", &devpoll_fops
+};
+
+int __init
+devpoll_init(void)
+{
+ printk(KERN_INFO "/dev/poll driver installed.\n");
+ misc_register(&devpoll);
+
+ return 0;
+}
+
+module_init(devpoll_init);
+#ifdef MODULE
+
+void
+cleanup_module(void)
+{
+ misc_deregister(&devpoll);
+}
+#endif
diff -rNu linux.orig/fs/file_table.c linux/fs/file_table.c
--- linux.orig/fs/file_table.c Wed Apr 18 19:49:12 2001
+++ linux/fs/file_table.c Wed Jun 27 16:49:49 2001
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
/* sysctl tunables... */
struct files_stat_struct files_stat = {0, 0, NR_FILE};
@@ -45,6 +46,7 @@
f->f_version = ++event;
f->f_uid = current->fsuid;
f->f_gid = current->fsgid;
+ rwlock_init(&f->f_dplock);
list_add(&f->f_list, &anon_list);
file_list_unlock();
return f;
diff -rNu linux.orig/fs/open.c linux/fs/open.c
--- linux.orig/fs/open.c Fri Feb 9 19:29:44 2001
+++ linux/fs/open.c Wed Jun 27 18:01:15 2001
@@ -14,6 +14,8 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/tty.h>
+#include <linux/poll.h>
+#include <linux/devpoll.h>
#include <asm/uaccess.h>
@@ -802,6 +805,14 @@
retval = filp->f_op->flush(filp);
unlock_kernel();
}
+
+ if (filp->f_backmap) {
+ unsigned long flags;
+ write_lock_irqsave(&filp->f_dplock,flags);
+ poll_clean_backmap(&filp->f_backmap);
+ write_unlock_irqrestore(&filp->f_dplock,flags);
+ }
+
fcntl_dirnotify(0, filp, 0);
locks_remove_posix(filp, id);
fput(filp);
@@ -828,6 +839,14 @@
FD_CLR(fd, files->close_on_exec);
__put_unused_fd(files, fd);
write_unlock(&files->file_lock);
+
+ if (filp->f_backmap) {
+ unsigned long flags;
+ write_lock_irqsave(&filp->f_dplock,flags);
+ poll_remove_backmap(&filp->f_backmap,fd, files);
+ write_unlock_irqrestore(&filp->f_dplock,flags);
+ }
+
return filp_close(filp, files);
out_unlock:
diff -rNu linux.orig/include/asm-i386/poll.h linux/include/asm-i386/poll.h
--- linux.orig/include/asm-i386/poll.h Thu Jan 23 19:01:28 1997
+++ linux/include/asm-i386/poll.h Wed Jun 27 17:16:57 2001
@@ -15,6 +15,8 @@
#define POLLWRNORM 0x0100
#define POLLWRBAND 0x0200
#define POLLMSG 0x0400
+#define POLLREMOVE 0x1000
+#define POLLHINT 0x2000
struct pollfd {
int fd;
diff -rNu linux.orig/include/linux/devpoll.h linux/include/linux/devpoll.h
--- linux.orig/include/linux/devpoll.h Thu Jan 1 01:00:00 1970
+++ linux/include/linux/devpoll.h Wed Jun 27 19:58:52 2001
@@ -0,0 +1,85 @@
+/*
+ * /dev/poll
+ * by Niels Provos <provos@citi.umich.edu>
+ *
+ * provides poll() support via /dev/poll as in Solaris.
+ *
+ * Linux 2.3.x port by Michal Ostrowski
+ * Linux 2.4.x patches by Vincent Sweeney <v.sweeney@dexterus.com>
+ */
+
+#ifndef _LINUX_DEVPOLL_H
+#define _LINUX_DEVPOLL_H
+
+#include <asm/bitops.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+
+#define DPH_DIRTY 0 /* entry is dirty - bit */
+#define DPH_BACKMAP 1 /* file has an fd back map - bit */
+#ifdef __KERNEL__
+struct dp_fd {
+ struct list_head next;
+ struct pollfd pfd;
+ int flags; /* for hinting */
+};
+
+struct devpoll {
+ struct list_head *dp_tab;
+ int dp_entries; /* Entries in hash table */
+ int dp_max; /* statistics */
+ int dp_avg; /* more */
+ int dp_count;
+ int dp_cached;
+ int dp_calls;
+ int dp_bucket_bits;
+ int dp_bucket_mask;
+ int dp_nfds; /* Number of poll fds */
+ u_char *dp_mmap; /* vaddr of mapped region */
+ atomic_t dp_mmapped; /* Are we mmapped */
+ rwlock_t dp_lock;
+ u_char **dp_memvec; /* Pointer to pages allocated for mmap */
+ int dp_numvec; /* Size of above array */
+};
+#endif
+/* Match solaris */
+
+struct dvpoll {
+ struct pollfd *dp_fds; /* Leave this ZERO for mmap */
+ int dp_nfds;
+ int dp_timeout;
+};
+
+#define DEVPOLL_MINOR 125 /* Minor device # for /dev/poll */
+
+#define DP_MMAP_SIZE(x) ((x) * sizeof(struct pollfd))
+
+#define DP_ALLOC _IOR('P', 1, int)
+#define DP_POLL _IOWR('P', 2, struct dvpoll)
+#define DP_FREE _IO('P', 3)
+#define DP_ISPOLLED _IOWR('P', 4, struct pollfd)
+
+#ifdef __KERNEL__
+extern rwlock_t devpoll_lock;
+/* Function Prototypes */
+
+extern inline void
+dp_add_hint (struct poll_backmap **map, rwlock_t * lock)
+{
+ struct poll_backmap *entry;
+ struct dp_fd *dpfd;
+ if (!map)
+ return;
+
+ read_lock (lock);
+ entry = *map;
+ while (entry) {
+ dpfd = entry->arg;
+ set_bit (DPH_DIRTY, &dpfd->flags); /* atomic */
+ entry = entry->next;
+ }
+ read_unlock (lock);
+}
+#endif /* __KERNEL__ */
+
+#endif
diff -rNu linux.orig/include/linux/fs.h linux/include/linux/fs.h
--- linux.orig/include/linux/fs.h Sat May 26 02:01:28 2001
+++ linux/include/linux/fs.h Wed Jun 27 19:20:36 2001
@@ -502,6 +502,10 @@
int f_error;
unsigned long f_version;
+
+ /* used by /dev/poll hinting */
+ struct poll_backmap *f_backmap;
+ rwlock_t f_dplock;
/* needed for tty driver, and maybe others */
void *private_data;
diff -rNu linux.orig/include/linux/poll.h linux/include/linux/poll.h
--- linux.orig/include/linux/poll.h Sat May 26 02:01:43 2001
+++ linux/include/linux/poll.h Wed Jun 27 19:21:05 2001
@@ -8,10 +8,18 @@
#include <linux/wait.h>
#include <linux/string.h>
#include <linux/mm.h>
+#include <linux/malloc.h>
#include <asm/uaccess.h>
struct poll_table_page;
+struct poll_backmap {
+ struct poll_backmap *next;
+ void *arg; /* pointer to devpoll */
+ struct files_struct *files; /* files which has this file as */
+ int fd; /* file descriptor number fd */
+};
+
typedef struct poll_table_struct {
int error;
struct poll_table_page * table;
@@ -83,7 +91,88 @@
memset(fdset, 0, FDS_BYTES(nr));
}
+extern inline void
+poll_backmap(int fd, void *arg, struct poll_backmap ** entry)
+{
+ struct poll_backmap *tmp;
+
+ if (!entry)
+ return;
+
+ /*
+ * See if we have an entry in the backmap already, in general
+ * we expect this linked list to be very short.
+ */
+ tmp = *entry;
+ while (tmp != NULL) {
+ if (tmp->files == current->files && tmp->fd == fd &&
+ arg==tmp->arg)
+ return;
+ tmp = tmp->next;
+ }
+
+ tmp = (struct poll_backmap *) kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (tmp == NULL)
+ return;
+
+ tmp->arg = arg;
+ tmp->files = current->files;
+ tmp->fd = fd;
+ tmp->next = *entry;
+
+ *entry = tmp;
+}
+
+extern inline void poll_remove_backmap(struct poll_backmap **map, int fd,
+ struct files_struct *files)
+{
+ struct poll_backmap *tmp = *map, *old = NULL;
+
+ while (tmp != NULL) {
+ if (tmp->files == files && tmp->fd == fd) {
+ struct poll_backmap *next = tmp->next;
+ if( old==NULL )
+ *map = next;
+ else
+ old->next = next;
+ kfree(tmp);
+ tmp = next;
+ } else {
+ old = tmp;
+ tmp = tmp->next;
+ }
+ }
+
+ if (!tmp)
+ return;
+
+ if (old == NULL)
+ *map = tmp->next;
+ else
+ old->next = tmp->next;
+
+ kfree (tmp);
+}
+
+extern inline void poll_clean_backmap(struct poll_backmap **map)
+{
+ struct poll_backmap *tmp = *map, *old;
+
+ printk("poll_clean_backmap: map %p\n", map);
+ printk("poll_clean_backmap: *map %p\n", *map);
+
+ while (tmp) {
+ printk("poll_clean_backmap: tmp %p\n", tmp);
+ old = tmp;
+ tmp = tmp->next;
+ kfree (old);
+ }
+
+ *map = NULL;
+}
+
extern int do_select(int n, fd_set_bits *fds, long *timeout);
+extern void poll_freewait(poll_table *p);
#endif /* KERNEL */
diff -rNu linux.orig/include/net/sock.h linux/include/net/sock.h
--- linux.orig/include/net/sock.h Sat May 26 02:03:05 2001
+++ linux/include/net/sock.h Wed Jun 27 19:21:05 2001
@@ -666,6 +666,10 @@
/* Identd and reporting IO signals */
struct socket *socket;
+ /* For Poll hinting */
+ void *backmap;
+ void *dplock;
+
/* RPC layer private data */
void *user_data;
diff -rNu linux.orig/net/core/datagram.c linux/net/core/datagram.c
--- linux.orig/net/core/datagram.c Thu Apr 12 20:11:39 2001
+++ linux/net/core/datagram.c Wed Jun 27 17:28:29 2001
@@ -402,8 +402,6 @@
return -EFAULT;
}
-
-
/*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
@@ -420,7 +418,10 @@
unsigned int mask;
poll_wait(file, sk->sleep, wait);
- mask = 0;
+ mask = POLLHINT;
+
+ sk->backmap = &file->f_backmap;
+ sk->dplock = &file->f_dplock;
/* exceptional events? */
if (sk->err || !skb_queue_empty(&sk->error_queue))
diff -rNu linux.orig/net/core/sock.c linux/net/core/sock.c
--- linux.orig/net/core/sock.c Wed Apr 25 22:57:39 2001
+++ linux/net/core/sock.c Wed Jun 27 18:04:44 2001
@@ -108,6 +108,7 @@
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/init.h>
+#include <linux/devpoll.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -1100,16 +1101,20 @@
void sock_def_wakeup(struct sock *sk)
{
read_lock(&sk->callback_lock);
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap, sk->dplock);
wake_up_interruptible_all(sk->sleep);
+ }
read_unlock(&sk->callback_lock);
}
void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->callback_lock);
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap, sk->dplock);
wake_up_interruptible(sk->sleep);
+ }
sk_wake_async(sk,0,POLL_ERR);
read_unlock(&sk->callback_lock);
}
@@ -1117,8 +1122,10 @@
void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->callback_lock);
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap, sk->dplock);
wake_up_interruptible(sk->sleep);
+ }
sk_wake_async(sk,1,POLL_IN);
read_unlock(&sk->callback_lock);
}
@@ -1131,8 +1138,10 @@
* progress. --DaveM
*/
if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap, sk->dplock);
wake_up_interruptible(sk->sleep);
+ }
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
@@ -1163,6 +1172,9 @@
sk->zapped = 1;
sk->socket = sock;
+ sk->backmap = NULL;
+ sk->dplock = NULL;
+
if(sock)
{
sk->type = sock->type;
diff -rNu linux.orig/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
--- linux.orig/net/ipv4/af_inet.c Wed May 2 04:59:24 2001
+++ linux/net/ipv4/af_inet.c Wed Jun 27 18:06:43 2001
@@ -444,6 +444,7 @@
if (sk->linger && !(current->flags & PF_EXITING))
timeout = sk->lingertime;
sock->sk = NULL;
+ sk->backmap = NULL;
sk->prot->close(sk, timeout);
}
return(0);
diff -rNu linux.orig/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- linux.orig/net/ipv4/tcp.c Wed May 16 18:31:27 2001
+++ linux/net/ipv4/tcp.c Wed Jun 27 17:37:22 2001
@@ -249,6 +249,7 @@
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/poll.h>
+#include <linux/devpoll.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
@@ -380,8 +381,12 @@
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
poll_wait(file, sk->sleep, wait);
+
+ sk->backmap = &file->f_backmap;
+ sk->dplock = &file->f_dplock;
+
if (sk->state == TCP_LISTEN)
- return tcp_listen_poll(sk, wait);
+ return tcp_listen_poll(sk, wait) | POLLHINT;
/* Socket is not locked. We are protected from async events
by poll logic and correct handling of state changes
@@ -454,7 +459,7 @@
if (tp->urg_data & TCP_URG_VALID)
mask |= POLLPRI;
}
- return mask;
+ return mask | POLLHINT;
}
/*
@@ -467,8 +472,10 @@
if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap, sk->dplock);
wake_up_interruptible(sk->sleep);
+ }
if (sock->fasync_list && !(sk->shutdown&SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
diff -rNu linux.orig/net/unix/af_unix.c linux/net/unix/af_unix.c
--- linux.orig/net/unix/af_unix.c Thu Apr 12 20:11:39 2001
+++ linux/net/unix/af_unix.c Wed Jun 27 17:39:17 2001
@@ -107,6 +107,7 @@
#include <net/scm.h>
#include <linux/init.h>
#include <linux/poll.h>
+#include <linux/devpoll.h>
#include <linux/smp_lock.h>
#include <asm/checksum.h>
@@ -299,8 +300,10 @@
{
read_lock(&sk->callback_lock);
if (unix_writable(sk)) {
- if (sk->sleep && waitqueue_active(sk->sleep))
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ dp_add_hint(sk->backmap,sk->dplock);
wake_up_interruptible(sk->sleep);
+ }
sk_wake_async(sk, 2, POLL_OUT);
}
read_unlock(&sk->callback_lock);
@@ -1698,7 +1701,10 @@
unsigned int mask;
poll_wait(file, sk->sleep, wait);
- mask = 0;
+ mask = POLLHINT;
+
+ sk->backmap = &file->f_backmap;
+ sk->dplock = &file->f_dplock;
/* exceptional events? */
if (sk->err)
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2001-06-27 19:34 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-06-27 19:33 PATCH (2.4.5): /dev/poll support (3rd time lucky) Zarjazz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox