From: Manfred Spraul <manfred@colorfullife.com>
To: linux-kernel@vger.kernel.org
Subject: [RFC] 6 sys_poll/sys_select performance patches
Date: Thu, 21 Nov 2002 00:00:08 +0100 [thread overview]
Message-ID: <3DDC13F8.2030805@colorfullife.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 659 bytes --]
Attached are 6 patches that try to improve the performance of sys_poll
and sys_select:
- avoid dynamic memory allocations, stack storage is sufficient for most
callers and faster.
- use the wakeup callbacks and use that info to speed up the 2nd scan
for new events.
What do you think? Are there any apps/tests/benchmarks that stress
sys_poll or sys_select?
The first 3 patches replace dynamic memory allocations with stack storage.
The 4th and 5th patch use wait queue callbacks for a more efficient 2nd
scan.
The 6th patch merges common code.
The patch is against 2.5.48 - they do boot on my laptop, but that's all
I can guarantee.
--
Manfred
[-- Attachment #2: patch-poll-1-wqalloc --]
[-- Type: text/plain, Size: 4279 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/include/linux/poll.h 2002-11-19 23:07:35.000000000 +0100
+++ build-2.5/include/linux/poll.h 2002-11-19 23:12:11.000000000 +0100
@@ -35,10 +35,20 @@
/*
* Structures and helpers for sys_poll/sys_poll
*/
+struct poll_table_entry {
+ struct file *filp;
+ wait_queue_t wait;
+ wait_queue_head_t *wait_address;
+};
+
+#define POLL_TABLE_INTERNAL 6
+
struct poll_wqueues {
poll_table pt;
- struct poll_table_page * table;
int error;
+ int nr;
+ struct poll_table_entry internal[POLL_TABLE_INTERNAL];
+ struct poll_table_page *table;
};
extern void poll_initwait(struct poll_wqueues *pwq);
--- 2.5/fs/select.c 2002-11-19 23:07:35.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-19 23:10:50.000000000 +0100
@@ -26,12 +26,6 @@
#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
-struct poll_table_entry {
- struct file * filp;
- wait_queue_t wait;
- wait_queue_head_t * wait_address;
-};
-
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
@@ -53,18 +47,36 @@
* as all select/poll functions have to call it to add an entry to the
* poll table.
*/
-void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p);
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p);
void poll_initwait(struct poll_wqueues *pwq)
{
init_poll_funcptr(&pwq->pt, __pollwait);
pwq->error = 0;
+ pwq->nr = 0;
pwq->table = NULL;
}
+/*
+ * Dynamic memory allocation is expensive, avoid it by
+ * saving a few bytes on the stack:
+ * - the poll table contains 6 wait queue entries. This means that no dynamic
+ * memory allocation is necessary for the wait queues if one of the first
+ * 6 file descriptors has new data.
+ * <manfred@colorfullife.com>
+ */
void poll_freewait(struct poll_wqueues *pwq)
{
struct poll_table_page * p = pwq->table;
+ struct poll_table_entry *entry;
+
+ entry = pwq->internal + pwq->nr;
+ while(pwq->nr > 0) {
+ pwq->nr--;
+ entry--;
+ remove_wait_queue(entry->wait_address,&entry->wait);
+ fput(entry->filp);
+ }
while (p) {
struct poll_table_entry * entry;
struct poll_table_page *old;
@@ -77,43 +89,46 @@
} while (entry > p->entries);
old = p;
p = p->next;
- free_page((unsigned long) old);
+ kfree(old);
}
}
-void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p)
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p)
{
struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
- struct poll_table_page *table = p->table;
-
- if (!table || POLL_TABLE_FULL(table)) {
- struct poll_table_page *new_table;
-
- new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
- if (!new_table) {
- p->error = -ENOMEM;
- __set_current_state(TASK_RUNNING);
- return;
+ struct poll_table_entry *entry;
+
+ if(p->nr < POLL_TABLE_INTERNAL) {
+ entry = p->internal+p->nr++;
+ } else {
+ struct poll_table_page *table = p->table;
+
+ if (!table || POLL_TABLE_FULL(table)) {
+ struct poll_table_page *new_table;
+
+ new_table = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!new_table) {
+ p->error = -ENOMEM;
+ __set_current_state(TASK_RUNNING);
+ return;
+ }
+ new_table->entry = new_table->entries;
+ new_table->next = table;
+ p->table = new_table;
+ table = new_table;
}
- new_table->entry = new_table->entries;
- new_table->next = table;
- p->table = new_table;
- table = new_table;
+ entry = table->entry;
+ table->entry = entry+1;
}
/* Add a new entry */
- {
- struct poll_table_entry * entry = table->entry;
- table->entry = entry+1;
- get_file(filp);
- entry->filp = filp;
- entry->wait_address = wait_address;
- init_waitqueue_entry(&entry->wait, current);
- add_wait_queue(wait_address,&entry->wait);
- }
+ get_file(filp);
+ entry->filp = filp;
+ entry->wait_address = wait_address;
+ init_waitqueue_entry(&entry->wait, current);
+ add_wait_queue(wait_address,&entry->wait);
}
-
#define __IN(fds, n) (fds->in + n)
#define __OUT(fds, n) (fds->out + n)
#define __EX(fds, n) (fds->ex + n)
[-- Attachment #3: patch-poll-2-selectalloc --]
[-- Type: text/plain, Size: 1604 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/fs/select.c 2002-11-20 19:08:58.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-20 19:00:11.000000000 +0100
@@ -62,6 +62,7 @@
* - the poll table contains 6 wait queue entries. This means that no dynamic
* memory allocation is necessary for the wait queues if one of the first
* 6 file descriptors has new data.
+ * - sys_select saves 192 bytes on the stack, enough for 256 file descriptors.
* <manfred@colorfullife.com>
*/
@@ -261,14 +262,18 @@
return retval;
}
-static void *select_bits_alloc(int size)
+#define SELECT_INLINE_BYTES 32
+static inline void *select_bits_alloc(int size, void *internal)
{
+ if(size <= SELECT_INLINE_BYTES)
+ return internal;
return kmalloc(6 * size, GFP_KERNEL);
}
-static void select_bits_free(void *bits, int size)
+static inline void select_bits_free(void *bits, void *internal)
{
- kfree(bits);
+ if(bits != internal)
+ kfree(bits);
}
/*
@@ -286,6 +291,7 @@
sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
{
fd_set_bits fds;
+ char ibuf[6*SELECT_INLINE_BYTES];
char *bits;
long timeout;
int ret, size, max_fdset;
@@ -325,7 +331,7 @@
*/
ret = -ENOMEM;
size = FDS_BYTES(n);
- bits = select_bits_alloc(size);
+ bits = select_bits_alloc(size, ibuf);
if (!bits)
goto out_nofds;
fds.in = (unsigned long *) bits;
@@ -370,7 +376,7 @@
set_fd_set(n, exp, fds.res_ex);
out:
- select_bits_free(bits, size);
+ select_bits_free(bits, ibuf);
out_nofds:
return ret;
}
[-- Attachment #4: patch-poll-3-alloc --]
[-- Type: text/plain, Size: 5204 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/fs/select.c 2002-11-20 00:32:46.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-20 00:33:46.000000000 +0100
@@ -63,6 +63,7 @@
* memory allocation is necessary for the wait queues if one of the first
* 6 file descriptors has new data.
* - sys_select saves 192 bytes on the stack, enough for 256 file descriptors.
+ * - sys_poll saves 190 byte of user space buffers on the stack.
* <manfred@colorfullife.com>
*/
@@ -381,7 +382,13 @@
return ret;
}
-#define POLLFD_PER_PAGE ((PAGE_SIZE) / sizeof(struct pollfd))
+struct poll_list {
+ struct poll_list *next;
+ int len;
+ struct pollfd entries[0];
+};
+
+#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
static void do_pollfd(unsigned int num, struct pollfd * fdpage,
poll_table ** pwait, int *count)
@@ -415,21 +422,23 @@
}
}
-static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft,
- struct pollfd *fds[], struct poll_wqueues *wait, long timeout)
+static int do_poll(unsigned int nfds, struct poll_list *list,
+ struct poll_wqueues *wait, long timeout)
{
- int count;
+ int count = 0;
poll_table* pt = &wait->pt;
+ if (!timeout)
+ pt = NULL;
+
for (;;) {
- unsigned int i;
-
+ struct poll_list *walk;
set_current_state(TASK_INTERRUPTIBLE);
- count = 0;
- for (i=0; i < nchunks; i++)
- do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count);
- if (nleft)
- do_pollfd(nleft, fds[nchunks], &pt, &count);
+ walk = list;
+ while(walk != NULL) {
+ do_pollfd( walk->len, walk->entries, &pt, &count);
+ walk = walk->next;
+ }
pt = NULL;
if (count || !timeout || signal_pending(current))
break;
@@ -442,12 +451,17 @@
return count;
}
+#define INLINE_POLL_COUNT ((190+sizeof(struct pollfd))/sizeof(struct pollfd))
asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
{
- int i, j, fdcount, err;
- struct pollfd **fds;
- struct poll_wqueues table, *wait;
- int nchunks, nleft;
+ struct poll_wqueues table;
+ int fdcount, err;
+ unsigned int i;
+ struct {
+ struct poll_list head;
+ struct pollfd entries[INLINE_POLL_COUNT];
+ } polldata;
+ struct poll_list *pollwalk;
/* Do a sanity check on nfds ... */
if (nfds > NR_OPEN)
@@ -462,68 +476,60 @@
}
poll_initwait(&table);
- wait = &table;
- if (!timeout)
- wait = NULL;
+ polldata.head.next = NULL;
+ polldata.head.len = INLINE_POLL_COUNT;
+ if(nfds <= INLINE_POLL_COUNT)
+ polldata.head.len = nfds;
+ pollwalk = &polldata.head;
+ i = nfds;
err = -ENOMEM;
- fds = NULL;
- if (nfds != 0) {
- fds = (struct pollfd **)kmalloc(
- (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
- GFP_KERNEL);
- if (fds == NULL)
- goto out;
- }
-
- nchunks = 0;
- nleft = nfds;
- while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
- fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
- if (fds[nchunks] == NULL)
+ goto start;
+ while(i!=0) {
+ struct poll_list *pp;
+ pp = kmalloc(sizeof(struct poll_list)+
+ sizeof(struct pollfd)*
+ (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+ GFP_KERNEL);
+ if(pp==NULL)
goto out_fds;
- nchunks++;
- nleft -= POLLFD_PER_PAGE;
- }
- if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
- fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
- if (fds[nchunks] == NULL)
+ pp->next=NULL;
+ pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+ pollwalk->next = pp;
+ pollwalk = pp;
+start:
+ if (copy_from_user(pollwalk+1, ufds + nfds-i,
+ sizeof(struct pollfd)*pollwalk->len)) {
+ err = -EFAULT;
goto out_fds;
+ }
+ i -= pollwalk->len;
}
-
- err = -EFAULT;
- for (i=0; i < nchunks; i++)
- if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
- goto out_fds1;
- if (nleft) {
- if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE,
- nleft * sizeof(struct pollfd)))
- goto out_fds1;
- }
-
- fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
+ fdcount = do_poll(nfds, &polldata.head, &table, timeout);
/* OK, now copy the revents fields back to user space. */
- for(i=0; i < nchunks; i++)
- for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
- __put_user((fds[i] + j)->revents, &ufds->revents);
- if (nleft)
- for (j=0; j < nleft; j++, ufds++)
- __put_user((fds[nchunks] + j)->revents, &ufds->revents);
-
+ pollwalk = &polldata.head;
+ err = -EFAULT;
+ while(pollwalk != NULL) {
+ struct pollfd *fds = pollwalk->entries;
+ int j;
+
+ for (j=0; j < pollwalk->len; j++, ufds++) {
+ if(__put_user(fds[j].revents, &ufds->revents))
+ goto out_fds;
+ }
+ pollwalk = pollwalk->next;
+ }
err = fdcount;
if (!fdcount && signal_pending(current))
err = -EINTR;
-
-out_fds1:
- if (nleft)
- free_page((unsigned long)(fds[nchunks]));
out_fds:
- for (i=0; i < nchunks; i++)
- free_page((unsigned long)(fds[i]));
- if (nfds != 0)
- kfree(fds);
-out:
+ pollwalk = polldata.head.next;
+ while(pollwalk!=NULL) {
+ struct poll_list *pp = pollwalk->next;
+ kfree(pollwalk);
+ pollwalk = pp;
+ }
poll_freewait(&table);
return err;
}
[-- Attachment #5: patch-poll-4-fast-select --]
[-- Type: text/plain, Size: 5783 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/include/linux/poll.h 2002-11-20 22:58:18.000000000 +0100
+++ build-2.5/include/linux/poll.h 2002-11-20 22:51:15.000000000 +0100
@@ -37,6 +37,8 @@
*/
struct poll_table_entry {
struct file *filp;
+ int woken;
+ void *handle;
wait_queue_t wait;
wait_queue_head_t *wait_address;
};
@@ -47,8 +49,10 @@
poll_table pt;
int error;
int nr;
+ void *handle;
struct poll_table_entry internal[POLL_TABLE_INTERNAL];
struct poll_table_page *table;
+ struct poll_table_page *last;
};
extern void poll_initwait(struct poll_wqueues *pwq);
--- 2.5/fs/select.c 2002-11-20 22:58:18.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-20 22:58:01.000000000 +0100
@@ -55,6 +55,7 @@
pwq->error = 0;
pwq->nr = 0;
pwq->table = NULL;
+ pwq->last = NULL;
}
/*
* Dynamic memory allocation is expensive, avoid it by
@@ -95,6 +96,15 @@
}
}
+static int poll_wake_func(wait_queue_t *wait, unsigned mode, int sync)
+{
+ struct poll_table_entry *entry = container_of(wait, struct poll_table_entry, wait);
+ entry->woken = 1;
+ mb();
+ wake_up_process(wait->task);
+ return 0;
+}
+
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p)
{
struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
@@ -103,7 +113,7 @@
if(p->nr < POLL_TABLE_INTERNAL) {
entry = p->internal+p->nr++;
} else {
- struct poll_table_page *table = p->table;
+ struct poll_table_page *table = p->last;
if (!table || POLL_TABLE_FULL(table)) {
struct poll_table_page *new_table;
@@ -115,8 +125,12 @@
return;
}
new_table->entry = new_table->entries;
- new_table->next = table;
- p->table = new_table;
+ new_table->next = NULL;
+ if (table)
+ table->next = new_table;
+ else
+ p->table = new_table;
+ p->last = new_table;
table = new_table;
}
entry = table->entry;
@@ -127,7 +141,10 @@
get_file(filp);
entry->filp = filp;
entry->wait_address = wait_address;
- init_waitqueue_entry(&entry->wait, current);
+ entry->woken = 0;
+ entry->handle = p->handle;
+ init_waitqueue_func_entry(&entry->wait, poll_wake_func);
+ entry->wait.task = current;
add_wait_queue(wait_address,&entry->wait);
}
@@ -189,6 +206,51 @@
#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
#define POLLEX_SET (POLLPRI)
+static int parse_mask(fd_set_bits *fds, unsigned long mask, unsigned long off, unsigned long bit)
+{
+ int retval = 0;
+
+ if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
+ SET(bit, __RES_IN(fds,off));
+ retval++;
+ }
+ if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
+ SET(bit, __RES_OUT(fds,off));
+ retval++;
+ }
+ if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
+ SET(bit, __RES_EX(fds,off));
+ retval++;
+ }
+ return retval;
+}
+
+static int scan_entries(fd_set_bits *fds, struct poll_table_entry *entries, int size, int *prev)
+{
+ int i;
+ int retval = 0;
+ for (i=0;i<size;i++) {
+ unsigned long mask;
+ int fd;
+ if (!entries[i].woken)
+ continue;
+ entries[i].woken = 0;
+ mb();
+ /* drivers are permitted to use multiple waitqueues, but we must
+ * not double-account that in the return value
+ */
+ fd = (int)entries[i].handle;
+ if (fd == *prev)
+ continue;
+ *prev = fd;
+ mask = DEFAULT_POLLMASK;
+ if (entries[i].filp->f_op && entries[i].filp->f_op->poll)
+ mask = entries[i].filp->f_op->poll(entries[i].filp, NULL);
+ retval += parse_mask(fds, mask, fd/__NFDBITS, BIT(fd));
+ }
+ return retval;
+}
+
int do_select(int n, fd_set_bits *fds, long *timeout)
{
struct poll_wqueues table;
@@ -209,41 +271,35 @@
if (!__timeout)
wait = NULL;
retval = 0;
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- for (i = 0 ; i < n; i++) {
- unsigned long bit = BIT(i);
- unsigned long mask;
- struct file *file;
-
- off = i / __NFDBITS;
- if (!(bit & BITS(fds, off)))
- continue;
- file = fget(i);
- mask = POLLNVAL;
- if (file) {
- mask = DEFAULT_POLLMASK;
- if (file->f_op && file->f_op->poll)
- mask = file->f_op->poll(file, wait);
- fput(file);
- }
- if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
- SET(bit, __RES_IN(fds,off));
- retval++;
- wait = NULL;
- }
- if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
- SET(bit, __RES_OUT(fds,off));
- retval++;
- wait = NULL;
- }
- if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
- SET(bit, __RES_EX(fds,off));
- retval++;
- wait = NULL;
- }
+ /* step one: build the wait table */
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0 ; i < n; i++) {
+ unsigned long bit = BIT(i);
+ unsigned long mask;
+ struct file *file;
+
+ off = i / __NFDBITS;
+ if (!(bit & BITS(fds, off)))
+ continue;
+ file = fget(i);
+ mask = POLLNVAL;
+ if (file) {
+ mask = DEFAULT_POLLMASK;
+ table.handle = (void*)i;
+ if (file->f_op && file->f_op->poll)
+ mask = file->f_op->poll(file, wait);
+ fput(file);
}
- wait = NULL;
+ retval += parse_mask(fds, mask, off, bit);
+ if (retval)
+ wait = NULL;
+ }
+ wait = NULL;
+ /* step two: now scan through the wait queues, that's faster
+ * than the bit lookup */
+ for (;;) {
+ struct poll_table_page *pg;
+ int prev;
if (retval || !__timeout || signal_pending(current))
break;
if(table.error) {
@@ -251,6 +307,14 @@
break;
}
__timeout = schedule_timeout(__timeout);
+ set_current_state(TASK_INTERRUPTIBLE);
+ prev = -1;
+ retval += scan_entries(fds, table.internal, table.nr, &prev);
+ pg = table.table;
+ while (pg) {
+ retval += scan_entries(fds, pg->entries, pg->entry-pg->entries, &prev);
+ pg = pg->next;
+ }
}
current->state = TASK_RUNNING;
[-- Attachment #6: patch-poll-5-fast-poll --]
[-- Type: text/plain, Size: 3757 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/fs/select.c 2002-11-20 22:59:34.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-20 22:59:25.000000000 +0100
@@ -225,7 +225,7 @@
return retval;
}
-static int scan_entries(fd_set_bits *fds, struct poll_table_entry *entries, int size, int *prev)
+static int scan_select_entries(fd_set_bits *fds, struct poll_table_entry *entries, int size, int *prev)
{
int i;
int retval = 0;
@@ -294,7 +294,6 @@
if (retval)
wait = NULL;
}
- wait = NULL;
/* step two: now scan through the wait queues, that's faster
* than the bit lookup */
for (;;) {
@@ -309,10 +308,10 @@
__timeout = schedule_timeout(__timeout);
set_current_state(TASK_INTERRUPTIBLE);
prev = -1;
- retval += scan_entries(fds, table.internal, table.nr, &prev);
+ retval = scan_select_entries(fds, table.internal, table.nr, &prev);
pg = table.table;
while (pg) {
- retval += scan_entries(fds, pg->entries, pg->entry-pg->entries, &prev);
+ retval += scan_select_entries(fds, pg->entries, pg->entry-pg->entries, &prev);
pg = pg->next;
}
}
@@ -455,7 +454,7 @@
#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
static void do_pollfd(unsigned int num, struct pollfd * fdpage,
- poll_table ** pwait, int *count)
+ struct poll_wqueues **pwait, int *count)
{
int i;
@@ -471,9 +470,11 @@
struct file * file = fget(fd);
mask = POLLNVAL;
if (file != NULL) {
+ if (*pwait)
+ (*pwait)->handle = fdp;
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll)
- mask = file->f_op->poll(file, *pwait);
+ mask = file->f_op->poll(file, &(*pwait)->pt);
mask &= fdp->events | POLLERR | POLLHUP;
fput(file);
}
@@ -486,30 +487,74 @@
}
}
+static int scan_poll_entries(struct poll_table_entry *entries, int size, void **prev)
+{
+ int retval = 0;
+ int i;
+
+ for (i=0;i<size;i++) {
+ unsigned long mask;
+ struct pollfd *fdp;
+
+ if (!entries[i].woken)
+ continue;
+ entries[i].woken = 0;
+ mb();
+ /* drivers are permitted to use multiple waitqueues, but we must
+ * not double-account that in the return value
+ */
+ fdp = entries[i].handle;
+ if (fdp == *prev)
+ continue;
+ *prev = fdp;
+ mask = DEFAULT_POLLMASK;
+ if (entries[i].filp->f_op && entries[i].filp->f_op->poll)
+ mask = entries[i].filp->f_op->poll(entries[i].filp, NULL);
+ mask &= fdp->events | POLLERR | POLLHUP;
+ if (mask) {
+ retval++;
+ fdp->revents = mask;
+ }
+ }
+ return retval;
+}
+
static int do_poll(unsigned int nfds, struct poll_list *list,
struct poll_wqueues *wait, long timeout)
{
+ struct poll_list *walk;
int count = 0;
- poll_table* pt = &wait->pt;
+ struct poll_wqueues *pt = wait;
if (!timeout)
pt = NULL;
-
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ walk = list;
+ while(walk != NULL) {
+ do_pollfd( walk->len, walk->entries, &pt, &count);
+ walk = walk->next;
+ }
+ pt = NULL;
for (;;) {
- struct poll_list *walk;
- set_current_state(TASK_INTERRUPTIBLE);
- walk = list;
- while(walk != NULL) {
- do_pollfd( walk->len, walk->entries, &pt, &count);
- walk = walk->next;
- }
- pt = NULL;
+ struct poll_table_page *pg;
+ void *prev;
+
if (count || !timeout || signal_pending(current))
break;
count = wait->error;
if (count)
break;
timeout = schedule_timeout(timeout);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ prev = NULL;
+ count = scan_poll_entries(wait->internal, wait->nr, &prev);
+ pg = wait->table;
+ while (pg) {
+ count += scan_poll_entries(pg->entries, pg->entry-pg->entries, &prev);
+ pg = pg->next;
+ }
}
current->state = TASK_RUNNING;
return count;
[-- Attachment #7: patch-poll-6-merge --]
[-- Type: text/plain, Size: 4823 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 5
// SUBLEVEL = 48
// EXTRAVERSION =
--- 2.5/fs/select.c 2002-11-20 23:25:46.000000000 +0100
+++ build-2.5/fs/select.c 2002-11-20 23:21:51.000000000 +0100
@@ -225,13 +225,16 @@
return retval;
}
-static int scan_select_entries(fd_set_bits *fds, struct poll_table_entry *entries, int size, int *prev)
+static int scan_entries(struct poll_table_entry *entries, int size, void **prev,
+ int (*actor)(void *priv, void *handle, unsigned long mask), void *priv)
{
- int i;
int retval = 0;
+ int i;
+
for (i=0;i<size;i++) {
unsigned long mask;
- int fd;
+ void *handle;
+
if (!entries[i].woken)
continue;
entries[i].woken = 0;
@@ -239,18 +242,51 @@
/* drivers are permitted to use multiple waitqueues, but we must
* not double-account that in the return value
*/
- fd = (int)entries[i].handle;
- if (fd == *prev)
+ handle = entries[i].handle;
+ if (handle == *prev)
continue;
- *prev = fd;
+ *prev = handle;
mask = DEFAULT_POLLMASK;
if (entries[i].filp->f_op && entries[i].filp->f_op->poll)
mask = entries[i].filp->f_op->poll(entries[i].filp, NULL);
- retval += parse_mask(fds, mask, fd/__NFDBITS, BIT(fd));
+ retval += actor(priv, handle, mask);
+
}
return retval;
}
+static int wait_for_events(struct poll_wqueues *wait, long *timeout,
+ int (*actor)(void *priv, void *handle, unsigned long mask), void *priv)
+{
+ int count = 0;
+ for (;;) {
+ struct poll_table_page *pg;
+ void *prev;
+ if (count || !(*timeout) || signal_pending(current))
+ break;
+ count = wait->error;
+ if (count)
+ break;
+ *timeout = schedule_timeout(*timeout);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ prev = NULL;
+ count = scan_entries(wait->internal, wait->nr, &prev, actor, priv);
+ pg = wait->table;
+ while (pg) {
+ count += scan_entries(pg->entries, pg->entry-pg->entries, &prev, actor, priv);
+ pg = pg->next;
+ }
+ }
+ return count;
+}
+
+static int select_actor(void *priv, void *handle, unsigned long mask)
+{
+ int fd = (int)handle;
+ return parse_mask(priv, mask, fd/__NFDBITS, BIT(fd));
+}
+
int do_select(int n, fd_set_bits *fds, long *timeout)
{
struct poll_wqueues table;
@@ -294,27 +330,8 @@
if (retval)
wait = NULL;
}
- /* step two: now scan through the wait queues, that's faster
- * than the bit lookup */
- for (;;) {
- struct poll_table_page *pg;
- int prev;
- if (retval || !__timeout || signal_pending(current))
- break;
- if(table.error) {
- retval = table.error;
- break;
- }
- __timeout = schedule_timeout(__timeout);
- set_current_state(TASK_INTERRUPTIBLE);
- prev = -1;
- retval = scan_select_entries(fds, table.internal, table.nr, &prev);
- pg = table.table;
- while (pg) {
- retval += scan_select_entries(fds, pg->entries, pg->entry-pg->entries, &prev);
- pg = pg->next;
- }
- }
+ if (!retval && __timeout)
+ retval = wait_for_events(&table, &__timeout, select_actor, fds);
current->state = TASK_RUNNING;
poll_freewait(&table);
@@ -487,36 +504,16 @@
}
}
-static int scan_poll_entries(struct poll_table_entry *entries, int size, void **prev)
+int poll_actor(void *priv, void *handle, unsigned long mask)
{
- int retval = 0;
- int i;
-
- for (i=0;i<size;i++) {
- unsigned long mask;
- struct pollfd *fdp;
+ struct pollfd *fdp = handle;
- if (!entries[i].woken)
- continue;
- entries[i].woken = 0;
- mb();
- /* drivers are permitted to use multiple waitqueues, but we must
- * not double-account that in the return value
- */
- fdp = entries[i].handle;
- if (fdp == *prev)
- continue;
- *prev = fdp;
- mask = DEFAULT_POLLMASK;
- if (entries[i].filp->f_op && entries[i].filp->f_op->poll)
- mask = entries[i].filp->f_op->poll(entries[i].filp, NULL);
- mask &= fdp->events | POLLERR | POLLHUP;
- if (mask) {
- retval++;
- fdp->revents = mask;
- }
+ mask &= fdp->events | POLLERR | POLLHUP;
+ if (mask) {
+ fdp->revents = mask;
+ return 1;
}
- return retval;
+ return 0;
}
static int do_poll(unsigned int nfds, struct poll_list *list,
@@ -535,27 +532,8 @@
do_pollfd( walk->len, walk->entries, &pt, &count);
walk = walk->next;
}
- pt = NULL;
- for (;;) {
- struct poll_table_page *pg;
- void *prev;
-
- if (count || !timeout || signal_pending(current))
- break;
- count = wait->error;
- if (count)
- break;
- timeout = schedule_timeout(timeout);
-
- set_current_state(TASK_INTERRUPTIBLE);
- prev = NULL;
- count = scan_poll_entries(wait->internal, wait->nr, &prev);
- pg = wait->table;
- while (pg) {
- count += scan_poll_entries(pg->entries, pg->entry-pg->entries, &prev);
- pg = pg->next;
- }
- }
+ if (!count && timeout)
+ count = wait_for_events(wait, &timeout, poll_actor, NULL);
current->state = TASK_RUNNING;
return count;
}
next reply other threads:[~2002-11-20 22:53 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-11-20 23:00 Manfred Spraul [this message]
2002-11-21 0:29 ` [RFC] 6 sys_poll/sys_select performance patches Davide Libenzi
2002-11-21 2:21 ` Hanna Linder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3DDC13F8.2030805@colorfullife.com \
--to=manfred@colorfullife.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.