linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] async poll for 2.5
@ 2002-10-14 22:36 Shailabh Nagar
  2002-10-14 22:54 ` John Myers
  2002-10-15 15:05 ` Benjamin LaHaise
  0 siblings, 2 replies; 138+ messages in thread
From: Shailabh Nagar @ 2002-10-14 22:36 UTC (permalink / raw)
  To: linux-kernel, linux-aio
  Cc: Andrew Morton, Ben LaHaise, David Miller, Linus Torvalds,
	Stephen Tweedie

[-- Attachment #1: Type: text/plain, Size: 1109 bytes --]

As of today, there is no scalable alternative to poll/select in the 2.5
kernel even though the topic has been discussed a number of times
before. The case for a scalable poll has been made often so I won't
get into that.

Attached is a port of the 2.4 async poll code to 2.5.41, written by
David Stevens with assistance from Jay Vosburgh and Mingming Cao (a
port for 2.5.42 is in progress and will be posted shortly). The
patch is a clean port of the 2.4 design and eliminates the use of
worktodos, just as Ben had done through the do_hack() function. The
patch has been tested on 2.5.41 using simple poll tests. A performance
evaluation and further testing is underway.

Even though Ben has indicated, on linux-aio and in OLS, that the 2.4
design doesn't scale well enough, it is a lot better than normal poll.
With the absence of alternatives and the impending feature freeze,
this patch would be one way to ensure that users have at least one
alternative to regular poll.

Ben, are you working on a different async poll implementation that is
likely to be ready by the feature freeze ?

Regards,
Shailabh


[-- Attachment #2: aiopoll-2.5.41-5.patch --]
[-- Type: text/plain, Size: 8694 bytes --]

diff -ruN linux-2.5.41/fs/aio.c linux-2.5.41AIO/fs/aio.c
--- linux-2.5.41/fs/aio.c	Mon Oct  7 11:24:13 2002
+++ linux-2.5.41AIO/fs/aio.c	Mon Oct 14 12:27:05 2002
@@ -59,6 +59,8 @@
 static spinlock_t	fput_lock = SPIN_LOCK_UNLOCKED;
 LIST_HEAD(fput_head);
 
+int async_poll(struct kiocb *iocb, int events);
+
 /* aio_setup
  *	Creates the slab caches used by the aio routines, panic on
  *	failure as this is done early during the boot sequence.
@@ -893,6 +895,19 @@
 	return -EINVAL;
 }
 
+ssize_t generic_aio_poll(struct file *file, struct kiocb *req, struct iocb *iocb)
+{
+	unsigned events = iocb->aio_buf;
+
+	/* Did the user set any bits they weren't supposed to? (The 
+	 * above is actually a cast.
+	 */
+	if (unlikely(events != iocb->aio_buf))
+		return -EINVAL;
+	
+	return async_poll(req, events);
+}
+
 static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
 				  struct iocb *iocb));
 static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
@@ -978,12 +993,15 @@
 		if (file->f_op->aio_fsync)
 			ret = file->f_op->aio_fsync(req, 0);
 		break;
+	case IOCB_CMD_POLL:
+		ret = generic_aio_poll(file, req, iocb);
+		break;
 	default:
 		dprintk("EINVAL: io_submit: no operation provided\n");
 		ret = -EINVAL;
 	}
 
-	if (likely(EIOCBQUEUED == ret))
+	if (likely(-EIOCBQUEUED == ret))
 		return 0;
 	if (ret >= 0) {
 		aio_complete(req, ret, 0);
diff -ruN linux-2.5.41/fs/select.c linux-2.5.41AIO/fs/select.c
--- linux-2.5.41/fs/select.c	Mon Oct  7 11:23:21 2002
+++ linux-2.5.41AIO/fs/select.c	Mon Oct 14 13:39:58 2002
@@ -20,6 +20,8 @@
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/aio.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 
@@ -27,19 +29,34 @@
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
 struct poll_table_entry {
-	struct file * filp;
 	wait_queue_t wait;
 	wait_queue_head_t * wait_address;
+	struct file * filp;
+	poll_table *p;
 };
 
 struct poll_table_page {
+	unsigned long size;
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
 	struct poll_table_entry entries[0];
 };
 
 #define POLL_TABLE_FULL(table) \
-	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
+	((unsigned long)((table)->entry+1) > \
+	 (table)->size + (unsigned long)(table))
+
+/* async poll uses only one entry per poll table as it is linked to an iocb */
+typedef struct async_poll_table_struct {
+	poll_table		pt;		
+	int			events;		/* event mask for async poll */
+	int			wake;
+	long			sync;
+	struct poll_table_page	pt_page;	/* one poll table page hdr */
+	struct poll_table_entry entries[1];	/* space for a single entry */
+} async_poll_table;
+
+static kmem_cache_t *async_poll_table_cache;
 
 /*
  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
@@ -53,8 +70,7 @@
  * as all select/poll functions have to call it to add an entry to the
  * poll table.
  */
-
-void poll_freewait(poll_table* pt)
+void __poll_freewait(poll_table* pt, wait_queue_t *wait)
 {
 	struct poll_table_page * p = pt->table;
 	while (p) {
@@ -62,15 +78,141 @@
 		struct poll_table_page *old;
 
 		entry = p->entry;
+		if (entry == p->entries) /* may happen with async poll */
+			break;
 		do {
 			entry--;
-			remove_wait_queue(entry->wait_address,&entry->wait);
+			if (wait != &entry->wait)
+				remove_wait_queue(entry->wait_address,&entry->wait);
+			else
+				__remove_wait_queue(entry->wait_address,&entry->wait);
 			fput(entry->filp);
 		} while (entry > p->entries);
 		old = p;
 		p = p->next;
-		free_page((unsigned long) old);
+		if (old->size == PAGE_SIZE)
+			free_page((unsigned long) old);
 	}
+	if (pt->iocb)
+		kmem_cache_free(async_poll_table_cache, pt);
+}
+
+void poll_freewait(poll_table* pt)
+{
+	__poll_freewait(pt, NULL);
+}
+
+void async_poll_complete(void *data)
+{
+	async_poll_table *pasync = data;
+	poll_table *p = data;
+	struct kiocb	*iocb = p->iocb;
+	unsigned int	mask;
+
+	pasync->wake = 0;
+	wmb();
+	do {
+		mask = iocb->ki_filp->f_op->poll(iocb->ki_filp, p);
+		mask &= pasync->events | POLLERR | POLLHUP;
+		if (mask) {
+			poll_table *p2 = xchg(&iocb->ki_data, NULL);
+			if (p2) {
+				poll_freewait(p2); 
+				aio_complete(iocb, mask, 0);
+			}
+			return;
+		}
+		pasync->sync = 0;
+		wmb();
+	} while (pasync->wake);
+}
+
+static int async_poll_waiter(wait_queue_t *wait, unsigned mode, int sync)
+{
+	struct poll_table_entry *entry = (struct poll_table_entry *)wait;
+	async_poll_table *pasync = (async_poll_table *)(entry->p);
+	struct kiocb	*iocb;
+	unsigned int	mask;
+
+	iocb = pasync->pt.iocb;
+	mask = iocb->ki_filp->f_op->poll(iocb->ki_filp, NULL);
+	mask &= pasync->events | POLLERR | POLLHUP;
+	if (mask) {
+		poll_table *p2 = xchg(&iocb->ki_data, NULL);
+		if (p2) {
+			__poll_freewait(p2, wait); 
+			aio_complete(iocb, mask, 0);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int async_poll_cancel(struct kiocb *iocb, struct io_event *res)
+{
+	poll_table *p;
+
+	/* FIXME: almost right */
+	p = xchg(&iocb->ki_data, NULL);
+	if (p) {
+		poll_freewait(p); 
+		aio_complete(iocb, 0, 0);
+		aio_put_req(iocb);
+		return 0;
+	}
+	aio_put_req(iocb);
+	return -EAGAIN;
+}
+
+int async_poll(struct kiocb *iocb, int events)
+{
+	unsigned int mask;
+	async_poll_table *pasync;
+	poll_table *p;
+
+	/* Fast path */
+	if (iocb->ki_filp->f_op && iocb->ki_filp->f_op->poll) {
+		mask = iocb->ki_filp->f_op->poll(iocb->ki_filp, NULL);
+		mask &= events | POLLERR | POLLHUP;
+		if (mask & events)
+			return events;
+	}
+
+	pasync = kmem_cache_alloc(async_poll_table_cache, SLAB_KERNEL);
+	if (!pasync)
+		return -ENOMEM;
+
+	p = (poll_table *)pasync;
+	poll_initwait(p);
+	p->iocb = iocb;
+	pasync->wake = 0;
+	pasync->sync = 0;
+	pasync->events = events;
+	pasync->pt_page.entry = pasync->pt_page.entries;
+	pasync->pt_page.size = sizeof(pasync->pt_page) + sizeof(pasync->entries);
+	pasync->pt_page.next = 0;
+	p->table = &pasync->pt_page;
+
+	iocb->ki_data = p;
+	wmb();
+	iocb->ki_cancel = async_poll_cancel;
+
+	mask = DEFAULT_POLLMASK;
+#warning broken
+	iocb->ki_users ++;
+	if (iocb->ki_filp->f_op && iocb->ki_filp->f_op->poll)
+		mask = iocb->ki_filp->f_op->poll(iocb->ki_filp, p);
+	mask &= events | POLLERR | POLLHUP;
+	if (mask && !test_and_set_bit(0, &pasync->sync))
+		aio_complete(iocb, mask, 0);
+
+	if (aio_put_req(iocb))
+		/* Must be freed after aio_complete to synchronise with 
+		 * cancellation of the request.
+		 */
+		poll_freewait(p);
+
+	return -EIOCBQUEUED;
 }
 
 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
@@ -86,6 +228,7 @@
 			__set_current_state(TASK_RUNNING);
 			return;
 		}
+		new_table->size = PAGE_SIZE;
 		new_table->entry = new_table->entries;
 		new_table->next = table;
 		p->table = new_table;
@@ -99,7 +242,11 @@
 	 	get_file(filp);
 	 	entry->filp = filp;
 		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
+		entry->p = p;
+		if (p->iocb) /* async poll */
+			init_waitqueue_func_entry(&entry->wait, async_poll_waiter);
+		else
+			init_waitqueue_entry(&entry->wait, current);
 		add_wait_queue(wait_address,&entry->wait);
 	}
 }
@@ -495,3 +642,14 @@
 	poll_freewait(&table);
 	return err;
 }
+
+static int __init async_poll_init(void)
+{
+	async_poll_table_cache = kmem_cache_create("async poll table",
+                        sizeof(async_poll_table), 0, 0, NULL, NULL);
+	if (!async_poll_table_cache)
+		panic("unable to alloc poll_table_cache");
+	return 0;
+}
+
+module_init(async_poll_init);
diff -ruN linux-2.5.41/include/linux/aio_abi.h linux-2.5.41AIO/include/linux/aio_abi.h
--- linux-2.5.41/include/linux/aio_abi.h	Mon Oct  7 11:23:28 2002
+++ linux-2.5.41AIO/include/linux/aio_abi.h	Mon Oct  7 16:33:36 2002
@@ -40,6 +40,7 @@
 	 * IOCB_CMD_PREADX = 4,
 	 * IOCB_CMD_POLL = 5,
 	 */
+	IOCB_CMD_POLL = 5,
 	IOCB_CMD_NOOP = 6,
 };
 
diff -ruN linux-2.5.41/include/linux/poll.h linux-2.5.41AIO/include/linux/poll.h
--- linux-2.5.41/include/linux/poll.h	Mon Oct  7 11:24:12 2002
+++ linux-2.5.41AIO/include/linux/poll.h	Tue Oct  8 12:06:44 2002
@@ -9,12 +9,14 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <asm/uaccess.h>
+#include <linux/workqueue.h>
 
 struct poll_table_page;
 
 typedef struct poll_table_struct {
-	int error;
-	struct poll_table_page * table;
+	int			error;
+	struct poll_table_page	*table;
+	struct kiocb		*iocb;		/* iocb for async poll */
 } poll_table;
 
 extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
@@ -29,6 +31,7 @@
 {
 	pt->error = 0;
 	pt->table = NULL;
+	pt->iocb = NULL;
 }
 extern void poll_freewait(poll_table* pt);
 

^ permalink raw reply	[flat|nested] 138+ messages in thread

end of thread, other threads:[~2002-10-24 14:59 UTC | newest]

Thread overview: 138+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-10-14 22:36 [PATCH] async poll for 2.5 Shailabh Nagar
2002-10-14 22:54 ` John Myers
2002-10-15 15:05 ` Benjamin LaHaise
2002-10-15 17:06   ` Dan Kegel
2002-10-15 17:03     ` Benjamin LaHaise
2002-10-15 17:18       ` Dan Kegel
2002-10-16  2:11         ` Lincoln Dale
2002-10-15 18:09     ` Shailabh Nagar
2002-10-15 18:53       ` Dan Kegel
2002-10-15 18:57         ` Benjamin LaHaise
2002-10-15 20:25           ` John Gardiner Myers
2002-10-15 21:09             ` Dan Kegel
2002-10-15 21:50               ` John Myers
2002-10-15 22:33                 ` Davide Libenzi
2002-10-15 22:56                   ` John Gardiner Myers
2002-10-15 23:23                     ` Davide Libenzi
2002-10-16 19:16                       ` John Myers
2002-10-15 21:11             ` Davide Libenzi
2002-10-15 22:01               ` John Gardiner Myers
2002-10-15 22:27                 ` Davide Libenzi
2002-10-15 22:36                   ` John Gardiner Myers
2002-10-15 22:41                     ` Benjamin LaHaise
2002-10-15 23:26                       ` John Gardiner Myers
2002-10-15 23:05                     ` Davide Libenzi
2002-10-15 23:33                       ` John Gardiner Myers
2002-10-16  0:05                         ` Davide Libenzi
2002-10-16  0:15                           ` John Myers
2002-10-16 14:25                             ` Davide Libenzi
2002-10-16 18:15                               ` John Gardiner Myers
2002-10-16 19:20                                 ` Davide Libenzi
2002-10-16 23:31                                   ` epoll (was Re: [PATCH] async poll for 2.5) John Gardiner Myers
2002-10-16 23:51                                     ` Davide Libenzi
2002-10-17 18:06                                       ` John Gardiner Myers
2002-10-17 18:33                                         ` Davide Libenzi
2002-10-18 19:02                                           ` John Gardiner Myers
2002-10-18 19:52                                             ` Davide Libenzi
2002-10-19  0:55                                               ` John Myers
2002-10-19  5:40                                                 ` Davide Libenzi
2002-10-19  6:59                                                 ` Mark Mielke
2002-10-19 17:26                                                   ` Davide Libenzi
2002-10-19 17:48                                                   ` Dan Kegel
2002-10-19 18:52                                                     ` Charles 'Buck' Krasic
2002-10-19 20:18                                                       ` Charles 'Buck' Krasic
2002-10-19 21:08                                                         ` Dan Kegel
2002-10-22 19:35                                                     ` John Gardiner Myers
2002-10-22 20:06                                                       ` Davide Libenzi
2002-10-22 21:54                                                         ` Erich Nahum
2002-10-22 22:17                                                           ` Dan Kegel
2002-10-22 22:25                                                           ` Davide Libenzi
2002-10-18 21:01                                             ` Charles 'Buck' Krasic
2002-10-18 21:33                                               ` Davide Libenzi
2002-10-19  1:05                                               ` John Myers
2002-10-19  1:27                                                 ` Tervel Atanassov
2002-10-19 18:52                                                   ` John G. Myers
2002-10-19  4:07                                                 ` Charles 'Buck' Krasic
2002-10-16 20:06                                 ` [PATCH] async poll for 2.5 Mark Mielke
2002-10-16 23:48                                   ` epoll (was Re: [PATCH] async poll for 2.5) John Gardiner Myers
2002-10-17  0:23                                     ` Davide Libenzi
2002-10-17 17:45                                       ` John Myers
2002-10-16  2:45                         ` [PATCH] async poll for 2.5 Charles 'Buck' Krasic
2002-10-16 14:28                           ` Davide Libenzi
2002-10-17 18:47                             ` Charles 'Buck' Krasic
2002-10-17 19:20                               ` Davide Libenzi
2002-10-18  3:30                               ` Dan Kegel
2002-10-16 18:29                           ` John Gardiner Myers
2002-10-16 20:39                             ` Charles 'Buck' Krasic
2002-10-17 17:59                               ` epoll (was Re: [PATCH] async poll for 2.5) John Gardiner Myers
2002-10-21 16:58                             ` [PATCH] async poll for 2.5 Alan Cox
2002-10-21 16:50                               ` Benjamin LaHaise
2002-10-16 19:59                     ` Dan Kegel
2002-10-16 20:03                 ` Dan Kegel
2002-10-17 17:43                   ` epoll (was Re: [PATCH] async poll for 2.5) John Myers
2002-10-18 17:00                     ` Mark Mielke
2002-10-18 17:28                       ` Dan Kegel
2002-10-18 17:41                         ` Davide Libenzi
2002-10-18 18:55                           ` Mark Mielke
2002-10-18 19:16                             ` Davide Libenzi
2002-10-19  6:56                               ` Mark Mielke
2002-10-19 16:10                                 ` Charles 'Buck' Krasic
2002-10-22 17:22                                   ` Mark Mielke
2002-10-22 17:46                                     ` Dan Kegel
2002-10-22 17:47                                     ` Davide Libenzi
2002-10-22 18:13                                       ` Alan Cox
2002-10-22 18:18                                         ` Davide Libenzi
2002-10-22 18:37                                           ` Benjamin LaHaise
2002-10-22 19:22                                             ` John Gardiner Myers
2002-10-22 19:28                                               ` Benjamin LaHaise
2002-10-22 19:50                                                 ` John Gardiner Myers
2002-10-22 20:00                                                   ` Benjamin LaHaise
2002-10-22 20:23                                                     ` async poll John Myers
2002-10-23 11:10                                                     ` Latest aio code (was Re: [PATCH] async poll for 2.5) Suparna Bhattacharya
2002-10-22 19:49                                             ` epoll " Davide Libenzi
2002-10-22 18:42                                     ` Charles 'Buck' Krasic
2002-10-22 19:35                                       ` Davide Libenzi
2002-10-23 16:49                                         ` Dan Kegel
2002-10-23 17:39                                           ` Benjamin LaHaise
2002-10-23 18:47                                             ` Davide Libenzi
2002-10-23 21:18                                               ` Benjamin LaHaise
2002-10-23 21:35                                                 ` Davide Libenzi
2002-10-23 21:39                                                   ` John Gardiner Myers
2002-10-23 21:54                                                     ` Davide Libenzi
2002-10-23 17:49                                           ` Charles 'Buck' Krasic
2002-10-23 18:14                                             ` Davide Libenzi
2002-10-23 18:32                                               ` Charles 'Buck' Krasic
2002-10-23 20:36                                               ` async poll John Myers
2002-10-23 20:57                                                 ` Dan Kegel
2002-10-23 21:23                                                   ` John Gardiner Myers
2002-10-23 21:51                                                     ` Davide Libenzi
2002-10-23 21:51                                                       ` bert hubert
2002-10-23 22:10                                                         ` Davide Libenzi
2002-10-23 21:54                                                       ` John Gardiner Myers
2002-10-23 22:22                                                         ` Davide Libenzi
2002-10-23 22:29                                                           ` John Gardiner Myers
2002-10-23 22:50                                                             ` Davide Libenzi
2002-10-24  7:32                                                               ` Eduardo Pérez
2002-10-24 15:05                                                                 ` Charles 'Buck' Krasic
2002-10-23 22:24                                                     ` Dan Kegel
2002-10-23 22:30                                                       ` Davide Libenzi
2002-10-23 22:53                                                         ` Davide Libenzi
2002-10-23 21:13                                                 ` Charles 'Buck' Krasic
2002-10-19 17:19                                 ` epoll (was Re: [PATCH] async poll for 2.5) Davide Libenzi
2002-10-18 18:55                       ` Chris Friesen
2002-10-18 19:00                         ` Mark Mielke
2002-10-15 17:38   ` [PATCH] async poll for 2.5 Shailabh Nagar
2002-10-15 17:50     ` Benjamin LaHaise
2002-10-15 18:16       ` Davide Libenzi
2002-10-15 18:18         ` Shailabh Nagar
2002-10-15 19:00           ` Davide Libenzi
2002-10-15 19:02             ` Benjamin LaHaise
2002-10-15 18:59               ` Shailabh Nagar
2002-10-15 19:16               ` Davide Libenzi
2002-10-15 19:12                 ` Benjamin LaHaise
2002-10-15 19:31                   ` Davide Libenzi
2002-10-15 19:38                     ` Dan Kegel
2002-10-15 19:55                       ` Davide Libenzi
2002-10-15 20:36                   ` John Gardiner Myers
2002-10-15 20:39                     ` Benjamin LaHaise
2002-10-15 19:02           ` Davide Libenzi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).