All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zarjazz" <zarjazz@barrysworld.com>
To: <linux-kernel@vger.kernel.org>
Cc: <kdc@nh.ultranet.com>, <linux-scalability@citi.umich.edu>
Subject: PATCH (2.4.5): /dev/poll support (3rd time lucky)
Date: Wed, 27 Jun 2001 20:33:21 +0100	[thread overview]
Message-ID: <00bb01c0ff40$065816c0$470c0a0a@DEVPC01> (raw)

[-- Attachment #1: Type: text/plain, Size: 803 bytes --]

Not my day it seems ! Hopefully I remembered to attach the file this time :)

--

Hi,
    this patch adds Solaris 7/8 like /dev/poll support to the kernel.

I can claim no real credit for this as basically this is a fixed version of
a patch available from http://www.citi.umich.edu/projects/linux-scalability/
to compile correctly with 2.4.5 that only seemed to work with the 2.3.x
devel branch. The reason for this is so I can compile & test an application
on my home linux pc when I'm not around my nice work Solaris boxes :)

Please note, I have not got the knowledge of kernel development to know if
this patch is broken or badly written. It may be bugged and/or worse than
the standard poll() call but my application works so I'll leave profiling
etc to people more knowledgable than me.

Vince.



[-- Attachment #2: devpoll-2.4.5.patch --]
[-- Type: application/octet-stream, Size: 31617 bytes --]

diff -rNu linux.orig/drivers/char/Config.in linux/drivers/char/Config.in
--- linux.orig/drivers/char/Config.in	Wed Mar  7 03:44:34 2001
+++ linux/drivers/char/Config.in	Wed Jun 27 16:41:00 2001
@@ -158,6 +158,7 @@
 
 dep_tristate 'Intel i8x0 Random Number Generator support' CONFIG_INTEL_RNG $CONFIG_PCI
 tristate '/dev/nvram support' CONFIG_NVRAM
+tristate '/dev/poll support' CONFIG_DEVPOLL
 tristate 'Enhanced Real Time Clock Support' CONFIG_RTC
 if [ "$CONFIG_IA64" = "y" ]; then
    bool 'EFI Real Time Clock Services' CONFIG_EFI_RTC
diff -rNu linux.orig/drivers/char/Makefile linux/drivers/char/Makefile
--- linux.orig/drivers/char/Makefile	Wed May 16 18:27:02 2001
+++ linux/drivers/char/Makefile	Wed Jun 27 16:43:07 2001
@@ -170,6 +170,7 @@
 obj-$(CONFIG_PC110_PAD) += pc110pad.o
 obj-$(CONFIG_RTC) += rtc.o
 obj-$(CONFIG_EFI_RTC) += efirtc.o
+obj-$(CONFIG_DEVPOLL) += devpoll.o
 ifeq ($(CONFIG_PPC),)
   obj-$(CONFIG_NVRAM) += nvram.o
 endif
diff -rNu linux.orig/drivers/char/devpoll.c linux/drivers/char/devpoll.c
--- linux.orig/drivers/char/devpoll.c	Thu Jan  1 01:00:00 1970
+++ linux/drivers/char/devpoll.c	Wed Jun 27 18:55:30 2001
@@ -0,0 +1,756 @@
+/*
+ * /dev/poll
+ * by Niels Provos <provos@citi.umich.edu>
+ *
+ * provides poll() support via /dev/poll as in Solaris.
+ *
+ * Linux 2.3.x port by Michal Ostrowski
+ * Linux 2.4.x patches by Vincent Sweeney <v.sweeney@dexterus.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/wrapper.h>
+
+#include <linux/devpoll.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+/*#define DEBUG 1 */
+#ifdef DEBUG
+#define DPRINTK(x)	printk x
+#define DNPRINTK(n,x)	if (n <= DEBUG) printk x
+#else
+#define DPRINTK(x)
+#define DNPRINTK(n,x)
+#endif
+
+/* Various utility functions */
+
+#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
+
+/* Do dynamic hashing */
+
+#define INITIAL_BUCKET_BITS 6
+#define MAX_BUCKET_BITS 16
+#define RESIZE_LENGTH	2
+
+static void free_pg_vec(struct devpoll *dp);
+
+/* Initalize the hash table */
+
+int
+dp_init(struct devpoll *dp)
+{
+	int i;
+	int num_buckets;
+	DNPRINTK(3, (KERN_INFO "/dev/poll: dp_init\n"));
+
+	dp->dp_lock = RW_LOCK_UNLOCKED;
+	dp->dp_entries = 0;
+	dp->dp_max = 0;
+	dp->dp_avg = dp->dp_count = 0;
+	dp->dp_cached = dp->dp_calls = 0;
+	dp->dp_bucket_bits = INITIAL_BUCKET_BITS;
+	dp->dp_bucket_mask = (1 << INITIAL_BUCKET_BITS) - 1;
+
+	num_buckets = (dp->dp_bucket_mask + 1);
+	dp->dp_tab = kmalloc(num_buckets * sizeof (struct list_head),
+			     GFP_KERNEL);
+
+	if (!dp->dp_tab)
+		return -ENOMEM;
+
+	for (i = 0; i < num_buckets; i++) {
+		INIT_LIST_HEAD(&dp->dp_tab[i]);
+	}
+
+	return (0);
+}
+
+int
+dp_resize(struct devpoll *dp)
+{
+	u_int16_t new_mask, old_mask;
+	int i;
+	struct list_head *new_tab, *old_tab;
+	struct dp_fd *dpfd;
+	unsigned long flags;
+	int num_buckets;
+
+	old_mask = dp->dp_bucket_mask;
+	new_mask = (old_mask + 1) * 2 - 1;
+	num_buckets = new_mask + 1;
+
+	DPRINTK((KERN_INFO "/dev/poll: resize %d -> %d\n", old_mask, new_mask));
+
+	new_tab = kmalloc(num_buckets * sizeof (struct list_head), GFP_KERNEL);
+	if (!new_tab)
+		return -ENOMEM;
+
+	for (i = 0; i < num_buckets; i++) {
+		INIT_LIST_HEAD(&new_tab[i]);
+	}
+
+	old_tab = dp->dp_tab;
+
+	/* Rehash all entries */
+	write_lock_irqsave(&dp->dp_lock, flags);
+	for (i = 0; i <= old_mask; i++) {
+		while (!list_empty(&old_tab[i])) {
+			dpfd = list_entry(old_tab[i].next, struct dp_fd, next);
+			list_del(&dpfd->next);
+			INIT_LIST_HEAD(&dpfd->next);
+			list_add(&dpfd->next,
+				 &new_tab[dpfd->pfd.fd & new_mask]);
+		}
+	}
+
+	dp->dp_tab = new_tab;
+	dp->dp_bucket_bits++;
+	dp->dp_bucket_mask = new_mask;
+	write_unlock_irqrestore(&dp->dp_lock, flags);
+
+	kfree(old_tab);
+
+	return (0);
+}
+
+int
+dp_insert(struct devpoll *dp, struct pollfd *pfd)
+{
+	struct dp_fd *dpfd;
+	u_int16_t bucket = pfd->fd & dp->dp_bucket_mask;
+	unsigned long flags;
+	struct file *file;
+
+	dpfd = kmalloc(sizeof (struct dp_fd), GFP_KERNEL);
+	if (!dpfd)
+		return -ENOMEM;
+
+	dpfd->flags = 0;
+	set_bit(DPH_DIRTY, &dpfd->flags);
+	dpfd->pfd = *pfd;
+	dpfd->pfd.revents = 0;
+	INIT_LIST_HEAD(&dpfd->next);
+
+	write_lock_irqsave(&dp->dp_lock, flags);
+
+	list_add(&dpfd->next, &dp->dp_tab[bucket]);
+
+	file = fcheck(pfd->fd);
+	if (file != NULL) {
+		write_lock(&(file)->f_dplock);
+		poll_backmap(pfd->fd, dpfd, &(file)->f_backmap);
+		write_unlock(&(file)->f_dplock);
+		set_bit(DPH_BACKMAP, &(dpfd)->flags);
+	}
+	write_unlock_irqrestore(&dp->dp_lock, flags);
+
+	dp->dp_entries++;
+	/* Check if we need to resize the hash table */
+	if ((dp->dp_entries >> dp->dp_bucket_bits) > RESIZE_LENGTH &&
+	    dp->dp_bucket_bits < MAX_BUCKET_BITS)
+		dp_resize(dp);
+
+	return (0);
+}
+
+struct dp_fd *
+dp_find(struct devpoll *dp, int fd)
+{
+	struct dp_fd *dpfd = NULL;
+	struct list_head *lh;
+	u_int16_t bucket = fd & dp->dp_bucket_mask;
+
+	read_lock(&dp->dp_lock);
+	list_for_each(lh, &dp->dp_tab[bucket]) {
+		dpfd = list_entry(lh, struct dp_fd, next);
+		if (dpfd->pfd.fd == fd)
+			break;
+		dpfd = NULL;
+	}
+
+	read_unlock(&dp->dp_lock);
+	DNPRINTK(2, (KERN_INFO "dp_find: %d -> %p\n", fd, dpfd));
+
+	return dpfd;
+}
+
+void
+dp_delete(struct devpoll *dp, struct dp_fd *dpfd)
+{
+	unsigned long flags;
+	int fd;
+	struct file *filp;
+
+	write_lock_irqsave(&dp->dp_lock, flags);
+	list_del(&dpfd->next);
+
+	INIT_LIST_HEAD(&dpfd->next);
+
+	/* Remove backmaps if necessary */
+	if (current->files) {
+		fd = dpfd->pfd.fd;
+		filp = fcheck(fd);
+
+		if (test_bit(DPH_BACKMAP, &dpfd->flags) &&
+		    filp && filp->f_backmap) {
+			write_lock(&filp->f_dplock);
+			poll_remove_backmap(&filp->f_backmap, fd,
+					    current->files);
+			write_unlock(&filp->f_dplock);
+		}
+	}
+	write_unlock_irqrestore(&dp->dp_lock, flags);
+
+	kfree(dpfd);
+
+	dp->dp_entries--;
+}
+
+void
+dp_free(struct devpoll *dp)
+{
+	int i;
+	struct dp_fd *dpfd = NULL;
+
+	lock_kernel();
+	for (i = 0; i <= dp->dp_bucket_mask; i++) {
+		while (!list_empty(&dp->dp_tab[i])) {
+			dpfd =
+			    list_entry(dp->dp_tab[i].next, struct dp_fd, next);
+			dp_delete(dp, dpfd);
+		}
+	}
+	unlock_kernel();
+
+	kfree(dp->dp_tab);
+}
+
+/*
+ * poll the fds that we keep in our state, return after we reached
+ * max changed fds or are done.
+ * XXX - I do not like how the wait table stuff is done.
+ */
+
+int
+dp_poll(struct devpoll *dp, int max, poll_table * wait,
+	long timeout, struct pollfd *rfds, int usemmap)
+{
+	int count = 0;
+	lock_kernel();
+	read_lock(&dp->dp_lock);
+	for (;;) {
+		unsigned int j = 0;
+		struct dp_fd *dpfd = NULL;
+		struct pollfd *fdpnt, pfd;
+		struct file *file;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		for (j = 0; (j <= dp->dp_bucket_mask) && count < max; j++) {
+			struct list_head *lh;
+			list_for_each(lh, &dp->dp_tab[j]) {
+
+				int fd;
+				unsigned int mask = 0;
+				dpfd = list_entry(lh, struct dp_fd, next);
+
+				if (count >= max) {
+					break;
+				}
+
+				fdpnt = &dpfd->pfd;
+				fd = fdpnt->fd;
+
+				/* poll_wait increments f_count if needed */
+				file = fcheck(fd);
+				if (file == NULL) {
+					/* Got to move backward first;
+					 * dp_delete will remove lh from
+					 * the list otherwise
+					 */
+					lh = lh->prev;
+					dp_delete(dp, dpfd);
+					dpfd = NULL;
+					continue;
+				}
+
+				mask = fdpnt->revents;
+				if (test_and_clear_bit(DPH_DIRTY,
+						       &dpfd->flags) ||
+				    wait != NULL || (mask & fdpnt->events)) {
+
+					mask = DEFAULT_POLLMASK;
+					if (file->f_op && file->f_op->poll)
+						mask =
+						    file->f_op->poll(file,
+								     wait);
+					/* if POLLHINT not supported by file
+					 * then set bit to dirty ---
+					 * must poll this file every time,
+					 * otherwise bit will be set by
+					 * calls to dp_add_hint
+					 */
+					if (!(mask & POLLHINT))
+						set_bit(DPH_DIRTY,
+							&dpfd->flags);
+					fdpnt->revents = mask;
+				} else
+					dp->dp_cached++;
+
+				dp->dp_calls++;
+
+				mask &= fdpnt->events | POLLERR | POLLHUP;
+				if (mask) {
+					wait = NULL;
+					count++;
+
+					if (usemmap) {
+						*rfds = *fdpnt;
+						rfds->revents = mask;
+					} else {
+						pfd = *fdpnt;
+						pfd.revents = mask;
+						__copy_to_user(rfds, &pfd,
+							       sizeof (struct
+								       pollfd));
+					}
+
+					rfds++;
+				}
+			}
+		}
+
+		wait = NULL;
+		if (count || !timeout || signal_pending(current))
+			break;
+		read_unlock(&dp->dp_lock);
+		timeout = schedule_timeout(timeout);
+		read_lock(&dp->dp_lock);
+	}
+	set_current_state(TASK_RUNNING);
+	read_unlock(&dp->dp_lock);
+	unlock_kernel();
+
+	if (!count && signal_pending(current))
+		return -EINTR;
+
+	return count;
+}
+
+/*
+ * close a /dev/poll
+ */
+
+static int
+close_devpoll(struct inode *inode, struct file *file)
+{
+	struct devpoll *dp = file->private_data;
+
+	DNPRINTK(1,
+		 (KERN_INFO "close /dev/poll, max: %d, avg: %d(%d/%d) %d/%d\n",
+		  dp->dp_max, dp->dp_avg / dp->dp_count, dp->dp_avg,
+		  dp->dp_count, dp->dp_cached, dp->dp_calls));
+
+	/* free allocated memory */
+	if (dp->dp_memvec)
+		free_pg_vec(dp);
+
+	/* Free the hash table */
+	dp_free(dp);
+
+	kfree(dp);
+
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+/*
+ * open a /dev/poll
+ */
+
+static int
+open_devpoll(struct inode *inode, struct file *file)
+{
+	struct devpoll *dp;
+	int r;
+
+	/* allocated state */
+	dp = kmalloc(sizeof (struct devpoll), GFP_KERNEL);
+	if (dp == NULL)
+		return -ENOMEM;
+
+	memset(dp, 0, sizeof (struct devpoll));
+	if ((r = dp_init(dp))) {
+		kfree(dp);
+		return r;
+	}
+
+	file->private_data = dp;
+
+	MOD_INC_USE_COUNT;
+
+	DNPRINTK(3, (KERN_INFO "open /dev/poll\n"));
+
+	return 0;
+}
+
+/*
+ * write to /dev/poll:
+ * a user writes struct pollfds and we add them to our list, or remove
+ * them if (events & POLLREMOVE) is true
+ */
+
+static int
+write_devpoll(struct file *file, const char *buffer, size_t count,
+	      loff_t * ppos)
+{
+	int r, rcount;
+	struct devpoll *dp = file->private_data;
+	struct pollfd pfd;
+	struct dp_fd *dpfd;
+#ifdef DEBUG
+	int add = 0, delete = 0, change = 0;
+#endif
+
+	DNPRINTK(3, (KERN_INFO "write /dev/poll %i\n", count));
+
+	if (count % sizeof (struct pollfd))
+		return -EINVAL;
+
+	if ((r = verify_area(VERIFY_READ, buffer, count)))
+		return r;
+
+	rcount = count;
+
+	lock_kernel();
+
+	while (count > 0) {
+		__copy_from_user(&pfd, buffer, sizeof (pfd));	/* no check */
+
+		dpfd = dp_find(dp, pfd.fd);
+
+		if (pfd.fd >= current->files->max_fds ||
+		    current->files->fd[pfd.fd] == NULL) {
+			/* Be tolerant, maybe the close happened already */
+			pfd.events = POLLREMOVE;
+		}
+		/* See if we need to remove the file descriptor.  If it
+		 * already exists OR the event fields, otherwise insert
+		 */
+		if (pfd.events & POLLREMOVE) {
+			if (dpfd)
+				dp_delete(dp, dpfd);
+#ifdef DEBUG
+			delete++;
+#endif
+		} else if (dpfd) {
+			/* XXX dpfd->pfd.events |= pfd.events; */
+			dpfd->pfd.events = pfd.events;
+#ifdef DEBUG
+			change++;
+#endif
+		} else {
+			dp_insert(dp, &pfd);
+#ifdef DEBUG
+			add++;
+#endif
+		}
+
+		buffer += sizeof (pfd);
+		count -= sizeof (pfd);
+	}
+
+	unlock_kernel();
+
+	if (dp->dp_max < dp->dp_entries) {
+		dp->dp_max = dp->dp_entries;
+		DNPRINTK(2, (KERN_INFO "/dev/poll: new max %d\n", dp->dp_max));
+	}
+
+	DNPRINTK(3, (KERN_INFO "write /dev/poll: %d entries (%d/%d/%d)\n",
+		     dp->dp_entries, add, delete, change));
+
+	return (rcount);
+}
+
+static int
+ioctl_devpoll(struct inode *inode, struct file *file,
+	      unsigned int cmd, unsigned long arg)
+{
+	struct devpoll *dp = file->private_data;
+	unsigned mapsize = 0;
+	unsigned num_pages = 0;
+	int i = 0;
+	switch (cmd) {
+	case DP_ALLOC:
+		if (arg > current->rlim[RLIMIT_NOFILE].rlim_cur)
+			return -EINVAL;
+		if (dp->dp_mmap)
+			return -EPERM;
+
+		mapsize = DP_MMAP_SIZE(arg);
+		num_pages = (PAGE_ALIGN(mapsize) >> PAGE_SHIFT);
+
+		dp->dp_memvec = kmalloc(num_pages * sizeof (unsigned long *),
+					GFP_KERNEL);
+
+		if (dp->dp_memvec == NULL)
+			return -EINVAL;
+
+		memset(dp->dp_memvec, 0, num_pages * sizeof (unsigned long *));
+
+		for (i = 0; i < num_pages; ++i) {
+			struct page *page, *page_end;
+
+			dp->dp_memvec[i] =
+			    (u_char *) __get_free_pages(GFP_KERNEL, 0);
+			if (!dp->dp_memvec[i]) {
+				free_pg_vec(dp);
+				return -ENOMEM;
+			}
+
+			page_end =
+			    virt_to_page(dp->dp_memvec[i] + PAGE_SIZE - 1);
+			for (page = virt_to_page(dp->dp_memvec[i]);
+			     page <= page_end; page++)
+				set_bit(PG_reserved, &page->flags);
+
+			++dp->dp_numvec;
+		}
+
+		dp->dp_nfds = arg;
+
+		DPRINTK((KERN_INFO "allocated %d pollfds\n", dp->dp_nfds));
+
+		return 0;
+	case DP_FREE:
+		if (atomic_read(&dp->dp_mmapped))
+			return -EBUSY;
+
+		if (dp->dp_memvec[i]) {
+			free_pg_vec(dp);
+		}
+
+		DPRINTK((KERN_INFO "freed %d pollfds\n", dp->dp_nfds));
+		dp->dp_nfds = 0;
+
+		return 0;
+	case DP_ISPOLLED:{
+			struct pollfd pfd;
+			struct dp_fd *dpfd;
+
+			if (copy_from_user(&pfd, (void *) arg, sizeof (pfd)))
+				return -EFAULT;
+			dpfd = dp_find(dp, pfd.fd);
+			if (dpfd == NULL)
+				return (0);
+
+			/* We poll this fd, return the evens we poll on */
+			pfd.events = dpfd->pfd.events;
+			pfd.revents = 0;
+
+			if (copy_to_user((void *) arg, &pfd, sizeof (pfd)))
+				return -EFAULT;
+			return (1);
+		}
+	case DP_POLL:{
+			struct dvpoll dopoll;
+			int nfds, usemmap = 0;
+			unsigned long timeout;
+			poll_table wait;
+			struct pollfd *rpfds = NULL;
+
+			if (copy_from_user
+			    (&dopoll, (void *) arg, sizeof (dopoll)))
+				return -EFAULT;
+
+			/* We do not need to check this value, its user space */
+			nfds = dopoll.dp_nfds;
+			if (nfds <= 0)
+				return -EINVAL;
+
+			if (dopoll.dp_fds == NULL) {
+				if (dp->dp_mmap == NULL)
+					return -EINVAL;
+				rpfds = (struct pollfd *) dp->dp_mmap;
+				usemmap = 1;
+			} else {
+				rpfds = dopoll.dp_fds;
+				if (verify_area(VERIFY_WRITE, rpfds,
+						nfds * sizeof (struct pollfd)))
+					return -EFAULT;
+				usemmap = 0;
+			}
+
+			timeout = dopoll.dp_timeout;
+			if (timeout) {
+				/* Careful about overflow in the intermediate values */
+				if ((unsigned long) timeout <
+				    MAX_SCHEDULE_TIMEOUT / HZ)
+					timeout =
+					    (timeout * HZ + 999) / 1000 + 1;
+				else	/* Negative or overflow */
+					timeout = MAX_SCHEDULE_TIMEOUT;
+			}
+
+			/* Initalize wait table */
+			poll_initwait(&wait);
+
+			nfds =
+			    dp_poll(dp, nfds, &wait, timeout, rpfds, usemmap);
+
+			DNPRINTK(2,
+				 (KERN_INFO "poll time %ld -> %d\n", timeout,
+				  nfds));
+
+			poll_freewait(&wait);
+
+			dp->dp_avg += dp->dp_entries;
+			dp->dp_count++;
+
+			return nfds;
+		}
+	default:
+		DPRINTK((KERN_INFO "ioctl(%x) /dev/poll\n", cmd));
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static void
+free_pg_vec(struct devpoll *dp)
+{
+	int i;
+
+	for (i = 0; i < dp->dp_numvec; i++) {
+		if (dp->dp_memvec[i]) {
+			struct page *page, *page_end;
+
+			page_end =
+			    virt_to_page(dp->dp_memvec[i] + PAGE_SIZE - 1);
+			for (page = virt_to_page(dp->dp_memvec[i]);
+			     page <= page_end; page++)
+				clear_bit(PG_reserved, &page->flags);
+
+			free_pages((unsigned) dp->dp_memvec[i], 0);
+		}
+	}
+	kfree(dp->dp_memvec);
+	dp->dp_numvec = 0;
+}
+
+static void
+devpoll_mm_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct devpoll *dp = file->private_data;
+	if (dp)
+		atomic_inc(&dp->dp_mmapped);
+}
+
+static void
+devpoll_mm_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct devpoll *dp = file->private_data;
+	if (dp)
+		atomic_dec(&dp->dp_mmapped);
+}
+
+static struct vm_operations_struct devpoll_mmap_ops = {
+	open:devpoll_mm_open,
+	close:devpoll_mm_close,
+};
+
+/*
+ * mmap shared memory.  the first half is an array  of struct pollfd,
+ * followed by an array of ints to indicate which file descriptors
+ * changed status.
+ */
+
+static int
+mmap_devpoll(struct file *file, struct vm_area_struct *vma)
+{
+	struct devpoll *dp = file->private_data;
+	unsigned long start;	/* Evil type to remap_page_range */
+	int i = 0;
+	int num_pages = 0;
+	size_t size, mapsize;
+
+	DPRINTK((KERN_INFO "mmap /dev/poll: %lx %lx\n",
+		 vma->vm_start, vma->vm_pgoff << PAGE_SHIFT));
+
+	if ((vma->vm_pgoff << PAGE_SHIFT) != 0)
+		return -EINVAL;
+
+	/* Calculate how much memory we can map */
+	size = PAGE_ALIGN(DP_MMAP_SIZE(dp->dp_nfds));
+	mapsize = PAGE_ALIGN(vma->vm_end - vma->vm_start);
+	num_pages = mapsize >> PAGE_SHIFT;
+
+	/* Check if the requested size is within our size */
+	if (mapsize > dp->dp_numvec << PAGE_SHIFT)
+		return -EINVAL;
+
+	start = vma->vm_start;
+	atomic_set(&dp->dp_mmapped, 1);
+	for (i = 0; i < num_pages; ++i) {
+		if (remap_page_range(start, __pa(dp->dp_memvec[i]),
+				     PAGE_SIZE, vma->vm_page_prot))
+			return -EINVAL;
+		start += PAGE_SIZE;
+	}
+	dp->dp_mmap = (u_char *) vma->vm_start;
+	vma->vm_ops = &devpoll_mmap_ops;
+
+	DPRINTK((KERN_INFO "mmap /dev/poll: %lx %x\n", page, mapsize));
+	return 0;
+}
+
+struct file_operations devpoll_fops = {
+	write:write_devpoll,
+	ioctl:ioctl_devpoll,
+	mmap:mmap_devpoll,
+	open:open_devpoll,
+	release:close_devpoll
+};
+
+static struct miscdevice devpoll = {
+	DEVPOLL_MINOR, "devpoll", &devpoll_fops
+};
+
+int __init
+devpoll_init(void)
+{
+	printk(KERN_INFO "/dev/poll driver installed.\n");
+	misc_register(&devpoll);
+
+	return 0;
+}
+
+module_init(devpoll_init);
+#ifdef MODULE
+
+void
+cleanup_module(void)
+{
+	misc_deregister(&devpoll);
+}
+#endif
diff -rNu linux.orig/fs/file_table.c linux/fs/file_table.c
--- linux.orig/fs/file_table.c	Wed Apr 18 19:49:12 2001
+++ linux/fs/file_table.c	Wed Jun 27 16:49:49 2001
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
+#include <linux/spinlock.h>
 
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {0, 0, NR_FILE};
@@ -45,6 +46,7 @@
 		f->f_version = ++event;
 		f->f_uid = current->fsuid;
 		f->f_gid = current->fsgid;
+		rwlock_init(&f->f_dplock);
 		list_add(&f->f_list, &anon_list);
 		file_list_unlock();
 		return f;
diff -rNu linux.orig/fs/open.c linux/fs/open.c
--- linux.orig/fs/open.c	Fri Feb  9 19:29:44 2001
+++ linux/fs/open.c	Wed Jun 27 18:01:15 2001
@@ -14,6 +14,8 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/tty.h>
+#include <linux/poll.h>
+#include <linux/devpoll.h>
 
 #include <asm/uaccess.h>
 
@@ -802,6 +805,14 @@
 		retval = filp->f_op->flush(filp);
 		unlock_kernel();
 	}
+
+	if (filp->f_backmap) {
+		unsigned long flags;
+		write_lock_irqsave(&filp->f_dplock,flags);
+		poll_clean_backmap(&filp->f_backmap);
+		write_unlock_irqrestore(&filp->f_dplock,flags);
+	}
+
 	fcntl_dirnotify(0, filp, 0);
 	locks_remove_posix(filp, id);
 	fput(filp);
@@ -828,6 +839,14 @@
 	FD_CLR(fd, files->close_on_exec);
 	__put_unused_fd(files, fd);
 	write_unlock(&files->file_lock);
+
+	if (filp->f_backmap) {
+		unsigned long flags;
+		write_lock_irqsave(&filp->f_dplock,flags);
+		poll_remove_backmap(&filp->f_backmap,fd, files);
+		write_unlock_irqrestore(&filp->f_dplock,flags);
+	}
+	
 	return filp_close(filp, files);
 
 out_unlock:
diff -rNu linux.orig/include/asm-i386/poll.h linux/include/asm-i386/poll.h
--- linux.orig/include/asm-i386/poll.h	Thu Jan 23 19:01:28 1997
+++ linux/include/asm-i386/poll.h	Wed Jun 27 17:16:57 2001
@@ -15,6 +15,8 @@
 #define POLLWRNORM	0x0100
 #define POLLWRBAND	0x0200
 #define POLLMSG		0x0400
+#define POLLREMOVE	0x1000
+#define POLLHINT	0x2000
 
 struct pollfd {
 	int fd;
diff -rNu linux.orig/include/linux/devpoll.h linux/include/linux/devpoll.h
--- linux.orig/include/linux/devpoll.h	Thu Jan  1 01:00:00 1970
+++ linux/include/linux/devpoll.h	Wed Jun 27 19:58:52 2001
@@ -0,0 +1,85 @@
+/*
+ * /dev/poll
+ * by Niels Provos <provos@citi.umich.edu>
+ *
+ * provides poll() support via /dev/poll as in Solaris.
+ *
+ * Linux 2.3.x port by Michal Ostrowski
+ * Linux 2.4.x patches by Vincent Sweeney <v.sweeney@dexterus.com>
+ */
+
+#ifndef _LINUX_DEVPOLL_H
+#define _LINUX_DEVPOLL_H
+
+#include <asm/bitops.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+
+#define DPH_DIRTY	0	/* entry is dirty - bit */
+#define DPH_BACKMAP	1	/* file has an fd back map - bit */
+#ifdef __KERNEL__
+struct dp_fd {
+	struct list_head next;
+	struct pollfd pfd;
+	int flags;		/* for hinting */
+};
+
+struct devpoll {
+	struct list_head *dp_tab;
+	int dp_entries;		/* Entries in hash table */
+	int dp_max;		/* statistics */
+	int dp_avg;		/* more */
+	int dp_count;
+	int dp_cached;
+	int dp_calls;
+	int dp_bucket_bits;
+	int dp_bucket_mask;
+	int dp_nfds;		/* Number of poll fds */
+	u_char *dp_mmap;	/* vaddr of mapped region */
+	atomic_t dp_mmapped;	/* Are we mmapped */
+	rwlock_t dp_lock;
+	u_char **dp_memvec;	/* Pointer to pages allocated for mmap */
+	int dp_numvec;		/* Size of above array */
+};
+#endif
+/* Match solaris */
+
+struct dvpoll {
+	struct pollfd *dp_fds;	/* Leave this ZERO for mmap */
+	int dp_nfds;
+	int dp_timeout;
+};
+
+#define DEVPOLL_MINOR       125	/* Minor device # for /dev/poll */
+
+#define DP_MMAP_SIZE(x)	((x) * sizeof(struct pollfd))
+
+#define DP_ALLOC	_IOR('P', 1, int)
+#define DP_POLL		_IOWR('P', 2, struct dvpoll)
+#define DP_FREE		_IO('P', 3)
+#define DP_ISPOLLED	_IOWR('P', 4, struct pollfd)
+
+#ifdef __KERNEL__
+extern rwlock_t devpoll_lock;
+/* Function Prototypes */
+
+extern inline void
+dp_add_hint (struct poll_backmap **map, rwlock_t * lock)
+{
+	struct poll_backmap *entry;
+	struct dp_fd *dpfd;
+	if (!map)
+		return;
+
+	read_lock (lock);
+	entry = *map;
+	while (entry) {
+		dpfd = entry->arg;
+		set_bit (DPH_DIRTY, &dpfd->flags);	/* atomic */
+		entry = entry->next;
+	}
+	read_unlock (lock);
+}
+#endif				/* __KERNEL__ */
+
+#endif
diff -rNu linux.orig/include/linux/fs.h linux/include/linux/fs.h
--- linux.orig/include/linux/fs.h	Sat May 26 02:01:28 2001
+++ linux/include/linux/fs.h	Wed Jun 27 19:20:36 2001
@@ -502,6 +502,10 @@
 	int			f_error;
 
 	unsigned long		f_version;
+	
+	/* used by /dev/poll hinting */
+	struct poll_backmap	*f_backmap;
+	rwlock_t		f_dplock;
 
 	/* needed for tty driver, and maybe others */
 	void			*private_data;
diff -rNu linux.orig/include/linux/poll.h linux/include/linux/poll.h
--- linux.orig/include/linux/poll.h	Sat May 26 02:01:43 2001
+++ linux/include/linux/poll.h	Wed Jun 27 19:21:05 2001
@@ -8,10 +8,18 @@
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/mm.h>
+#include <linux/malloc.h>
 #include <asm/uaccess.h>
 
 struct poll_table_page;
 
+struct poll_backmap {
+	struct poll_backmap *next;
+	void *arg;			/* pointer to devpoll */
+	struct files_struct *files;	/* files which has this file as */
+	int fd;				/* file descriptor number fd */
+};
+
 typedef struct poll_table_struct {
 	int error;
 	struct poll_table_page * table;
@@ -83,7 +91,88 @@
 	memset(fdset, 0, FDS_BYTES(nr));
 }
 
+extern inline void
+poll_backmap(int fd, void *arg, struct poll_backmap ** entry)
+{
+	struct poll_backmap *tmp;
+
+	if (!entry)
+		return;
+
+	/*
+	 * See if we have an entry in the backmap already, in general
+	 * we expect this linked list to be very short.
+	 */
+	tmp = *entry;
+	while (tmp != NULL) {
+		if (tmp->files == current->files && tmp->fd == fd && 
+		    arg==tmp->arg)
+			return;
+		tmp = tmp->next;
+	}
+
+	tmp = (struct poll_backmap *) kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (tmp == NULL)
+		return;
+
+	tmp->arg = arg;
+	tmp->files = current->files;
+	tmp->fd = fd;
+	tmp->next = *entry;
+
+	*entry = tmp;
+}
+
+extern inline void poll_remove_backmap(struct poll_backmap **map, int fd,
+				       struct files_struct *files)
+{
+	struct poll_backmap *tmp = *map, *old = NULL;
+	
+	while (tmp != NULL) {
+		if (tmp->files == files && tmp->fd == fd) {
+			struct poll_backmap *next = tmp->next;
+			if( old==NULL )
+				*map = next;
+			else
+				old->next = next;
+			kfree(tmp);
+			tmp = next;
+		} else {
+			old = tmp;
+			tmp = tmp->next;
+		}
+	}
+	
+	if (!tmp)
+		return;
+	
+	if (old == NULL)
+		*map = tmp->next;
+	else 
+		old->next = tmp->next;
+
+	kfree (tmp);
+}
+
+extern inline void poll_clean_backmap(struct poll_backmap **map)
+{
+	struct poll_backmap *tmp = *map, *old;
+
+	printk("poll_clean_backmap: map %p\n", map);
+	printk("poll_clean_backmap: *map %p\n", *map);
+
+	while (tmp) {
+	  printk("poll_clean_backmap: tmp %p\n", tmp);
+		old = tmp;
+		tmp = tmp->next;
+		kfree (old);
+	}
+
+	*map = NULL;
+}
+
 extern int do_select(int n, fd_set_bits *fds, long *timeout);
+extern void poll_freewait(poll_table *p);
 
 #endif /* KERNEL */
 
diff -rNu linux.orig/include/net/sock.h linux/include/net/sock.h
--- linux.orig/include/net/sock.h	Sat May 26 02:03:05 2001
+++ linux/include/net/sock.h	Wed Jun 27 19:21:05 2001
@@ -666,6 +666,10 @@
 	/* Identd and reporting IO signals */
 	struct socket		*socket;
 
+	/* For Poll hinting */
+	void			*backmap;
+	void			*dplock;
+
 	/* RPC layer private data */
 	void			*user_data;
   
diff -rNu linux.orig/net/core/datagram.c linux/net/core/datagram.c
--- linux.orig/net/core/datagram.c	Thu Apr 12 20:11:39 2001
+++ linux/net/core/datagram.c	Wed Jun 27 17:28:29 2001
@@ -402,8 +402,6 @@
 	return -EFAULT;
 }
 
-
-
 /*
  *	Datagram poll: Again totally generic. This also handles
  *	sequenced packet sockets providing the socket receive queue
@@ -420,7 +418,10 @@
 	unsigned int mask;
 
 	poll_wait(file, sk->sleep, wait);
-	mask = 0;
+	mask = POLLHINT;
+	
+	sk->backmap = &file->f_backmap;
+	sk->dplock  = &file->f_dplock;
 
 	/* exceptional events? */
 	if (sk->err || !skb_queue_empty(&sk->error_queue))
diff -rNu linux.orig/net/core/sock.c linux/net/core/sock.c
--- linux.orig/net/core/sock.c	Wed Apr 25 22:57:39 2001
+++ linux/net/core/sock.c	Wed Jun 27 18:04:44 2001
@@ -108,6 +108,7 @@
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/init.h>
+#include <linux/devpoll.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -1100,16 +1101,20 @@
 void sock_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->callback_lock);
-	if (sk->sleep && waitqueue_active(sk->sleep))
+	if (sk->sleep && waitqueue_active(sk->sleep)) {
+		dp_add_hint(sk->backmap, sk->dplock);
 		wake_up_interruptible_all(sk->sleep);
+	}
 	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_error_report(struct sock *sk)
 {
 	read_lock(&sk->callback_lock);
-	if (sk->sleep && waitqueue_active(sk->sleep))
+	if (sk->sleep && waitqueue_active(sk->sleep)) {
+		dp_add_hint(sk->backmap, sk->dplock);
 		wake_up_interruptible(sk->sleep);
+	}
 	sk_wake_async(sk,0,POLL_ERR); 
 	read_unlock(&sk->callback_lock);
 }
@@ -1117,8 +1122,10 @@
 void sock_def_readable(struct sock *sk, int len)
 {
 	read_lock(&sk->callback_lock);
-	if (sk->sleep && waitqueue_active(sk->sleep))
+	if (sk->sleep && waitqueue_active(sk->sleep)) {
+		dp_add_hint(sk->backmap, sk->dplock);
 		wake_up_interruptible(sk->sleep);
+	}
 	sk_wake_async(sk,1,POLL_IN);
 	read_unlock(&sk->callback_lock);
 }
@@ -1131,8 +1138,10 @@
 	 * progress.  --DaveM
 	 */
 	if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
-		if (sk->sleep && waitqueue_active(sk->sleep))
+		if (sk->sleep && waitqueue_active(sk->sleep)) {
+			dp_add_hint(sk->backmap, sk->dplock);		
 			wake_up_interruptible(sk->sleep);
+		}
 
 		/* Should agree with poll, otherwise some programs break */
 		if (sock_writeable(sk))
@@ -1163,6 +1172,9 @@
 	sk->zapped	=	1;
 	sk->socket	=	sock;
 
+	sk->backmap	=	NULL;
+	sk->dplock	=	NULL;
+	
 	if(sock)
 	{
 		sk->type	=	sock->type;
diff -rNu linux.orig/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
--- linux.orig/net/ipv4/af_inet.c	Wed May  2 04:59:24 2001
+++ linux/net/ipv4/af_inet.c	Wed Jun 27 18:06:43 2001
@@ -444,6 +444,7 @@
 		if (sk->linger && !(current->flags & PF_EXITING))
 			timeout = sk->lingertime;
 		sock->sk = NULL;
+		sk->backmap = NULL;
 		sk->prot->close(sk, timeout);
 	}
 	return(0);
diff -rNu linux.orig/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- linux.orig/net/ipv4/tcp.c	Wed May 16 18:31:27 2001
+++ linux/net/ipv4/tcp.c	Wed Jun 27 17:37:22 2001
@@ -249,6 +249,7 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
+#include <linux/devpoll.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 
@@ -380,8 +381,12 @@
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
 	poll_wait(file, sk->sleep, wait);
+
+	sk->backmap = &file->f_backmap;
+	sk->dplock  = &file->f_dplock;
+
 	if (sk->state == TCP_LISTEN)
-		return tcp_listen_poll(sk, wait);
+		return tcp_listen_poll(sk, wait) | POLLHINT;
 
 	/* Socket is not locked. We are protected from async events
 	   by poll logic and correct handling of state changes
@@ -454,7 +459,7 @@
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
 	}
-	return mask;
+	return mask | POLLHINT;
 }
 
 /*
@@ -467,8 +472,10 @@
 	if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk->sleep && waitqueue_active(sk->sleep))
+		if (sk->sleep && waitqueue_active(sk->sleep)) {
+			dp_add_hint(sk->backmap, sk->dplock);
 			wake_up_interruptible(sk->sleep);
+		}
 
 		if (sock->fasync_list && !(sk->shutdown&SEND_SHUTDOWN))
 			sock_wake_async(sock, 2, POLL_OUT);
diff -rNu linux.orig/net/unix/af_unix.c linux/net/unix/af_unix.c
--- linux.orig/net/unix/af_unix.c	Thu Apr 12 20:11:39 2001
+++ linux/net/unix/af_unix.c	Wed Jun 27 17:39:17 2001
@@ -107,6 +107,7 @@
 #include <net/scm.h>
 #include <linux/init.h>
 #include <linux/poll.h>
+#include <linux/devpoll.h>
 #include <linux/smp_lock.h>
 
 #include <asm/checksum.h>
@@ -299,8 +300,10 @@
 {
 	read_lock(&sk->callback_lock);
 	if (unix_writable(sk)) {
-		if (sk->sleep && waitqueue_active(sk->sleep))
+		if (sk->sleep && waitqueue_active(sk->sleep)) {
+			dp_add_hint(sk->backmap,sk->dplock);
 			wake_up_interruptible(sk->sleep);
+		}
 		sk_wake_async(sk, 2, POLL_OUT);
 	}
 	read_unlock(&sk->callback_lock);
@@ -1698,7 +1701,10 @@
 	unsigned int mask;
 
 	poll_wait(file, sk->sleep, wait);
-	mask = 0;
+	mask = POLLHINT;
+
+	sk->backmap = &file->f_backmap;
+	sk->dplock  = &file->f_dplock;
 
 	/* exceptional events? */
 	if (sk->err)

                 reply	other threads:[~2001-06-27 19:34 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='00bb01c0ff40$065816c0$470c0a0a@DEVPC01' \
    --to=zarjazz@barrysworld.com \
    --cc=kdc@nh.ultranet.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scalability@citi.umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.