* [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
@ 2006-01-03 20:58 Andi Kleen
2006-01-03 22:14 ` Eric Dumazet
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-03 20:58 UTC (permalink / raw)
To: linux-kernel
This is a RFC for now. I would be interested in testing
feedback. Patch is for 2.6.15.
Optimize select and poll by a using stack space for small fd sets
This brings back an old optimization from Linux 2.0. Using
the stack is faster than kmalloc. On a Intel P4 system
it speeds up a select of a single pty fd by about 13%
(~4000 cycles -> ~3500)
It also saves memory because a daemon hanging in select or poll
will usually save one or two less pages. This can add up -
e.g. if you have 10 daemons blocking in poll/select you
save 40KB of memory.
I did a patch for this long ago, but it was never applied.
This version is a reimplementation of the old patch that
tries to be less intrusive. I only did the minimal changes
needed for the stack allocation.
The cut off point before external memory is allocated
is currently at 832bytes. The system calls always allocate
this much memory on the stack.
These 832 bytes are divided into 256 bytes frontend data (for the select
bitmaps of the pollfds) and the rest of the space for the wait queues used
by the low level drivers. There are some extreme
cases where this won't work out for select and it falls back
to allocating memory too early - especially with very sparse
large select bitmaps - but the majority of processes who only have a small
number of file descriptors should be ok.
[TBD: 832/256 might not be the best split for select or poll]
I suspect more optimizations might be possible, but they would
be more complicated. One way would be to cache the select/poll
context over multiple system calls because typically the
input values should be similar. Problem is when to flush
the file descriptors out though.
Signed-off-by: Andi Kleen <ak@suse.de>
Index: linux-2.6.15rc7-work/fs/select.c
===================================================================
--- linux-2.6.15rc7-work.orig/fs/select.c
+++ linux-2.6.15rc7-work/fs/select.c
@@ -29,12 +29,6 @@
#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
-struct poll_table_entry {
- struct file * filp;
- wait_queue_t wait;
- wait_queue_head_t * wait_address;
-};
-
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *
init_poll_funcptr(&pwq->pt, __pollwait);
pwq->error = 0;
pwq->table = NULL;
+ pwq->inline_index = 0;
}
EXPORT_SYMBOL(poll_initwait);
+static void free_poll_entry(struct poll_table_entry *entry)
+{
+ remove_wait_queue(entry->wait_address,&entry->wait);
+ fput(entry->filp);
+}
+
void poll_freewait(struct poll_wqueues *pwq)
{
struct poll_table_page * p = pwq->table;
+ int i;
+ for (i = 0; i < pwq->inline_index; i++)
+ free_poll_entry(pwq->inline_entries + i);
while (p) {
struct poll_table_entry * entry;
struct poll_table_page *old;
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *
entry = p->entry;
do {
entry--;
- remove_wait_queue(entry->wait_address,&entry->wait);
- fput(entry->filp);
+ free_poll_entry(entry);
} while (entry > p->entries);
old = p;
p = p->next;
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *
EXPORT_SYMBOL(poll_freewait);
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
- poll_table *_p)
-{
+static struct poll_table_entry *poll_get_entry(poll_table *_p)
+{
struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
struct poll_table_page *table = p->table;
+ if (p->inline_index < N_INLINE_POLL_ENTRIES)
+ return p->inline_entries + p->inline_index++;
+
if (!table || POLL_TABLE_FULL(table)) {
struct poll_table_page *new_table;
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp
if (!new_table) {
p->error = -ENOMEM;
__set_current_state(TASK_RUNNING);
- return;
+ return NULL;
}
new_table->entry = new_table->entries;
new_table->next = table;
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp
table = new_table;
}
- /* Add a new entry */
- {
- struct poll_table_entry * entry = table->entry;
- table->entry = entry+1;
- get_file(filp);
- entry->filp = filp;
- entry->wait_address = wait_address;
- init_waitqueue_entry(&entry->wait, current);
- add_wait_queue(wait_address,&entry->wait);
- }
+ return table->entry++;
+}
+
+/* Add a new entry */
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+ poll_table *p)
+{
+ struct poll_table_entry *entry = poll_get_entry(p);
+ if (!entry)
+ return;
+ get_file(filp);
+ entry->filp = filp;
+ entry->wait_address = wait_address;
+ init_waitqueue_entry(&entry->wait, current);
+ add_wait_queue(wait_address,&entry->wait);
}
#define FDS_IN(fds, n) (fds->in + n)
@@ -274,16 +284,6 @@ int do_select(int n, fd_set_bits *fds, l
return retval;
}
-static void *select_bits_alloc(int size)
-{
- return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
- kfree(bits);
-}
-
/*
* We can actually return ERESTARTSYS instead of EINTR, but I'd
* like to be certain this leads to no problems. So I return
@@ -303,6 +303,8 @@ sys_select(int n, fd_set __user *inp, fd
long timeout;
int ret, size, max_fdset;
struct fdtable *fdt;
+ /* Allocate small arguments on the stack to save memory and be faster */
+ char stack_fds[SELECT_STACK_ALLOC];
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -344,7 +346,10 @@ sys_select(int n, fd_set __user *inp, fd
*/
ret = -ENOMEM;
size = FDS_BYTES(n);
- bits = select_bits_alloc(size);
+ if (6*size < SELECT_STACK_ALLOC)
+ bits = stack_fds;
+ else
+ bits = kmalloc(6 * size, GFP_KERNEL);
if (!bits)
goto out_nofds;
fds.in = (unsigned long *) bits;
@@ -390,7 +395,8 @@ sys_select(int n, fd_set __user *inp, fd
ret = -EFAULT;
out:
- select_bits_free(bits, size);
+ if (bits != stack_fds)
+ kfree(bits);
out_nofds:
return ret;
}
@@ -464,6 +470,8 @@ static int do_poll(unsigned int nfds, s
return count;
}
+#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / sizeof(struct pollfd))
+
asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
{
struct poll_wqueues table;
@@ -473,6 +481,9 @@ asmlinkage long sys_poll(struct pollfd _
struct poll_list *walk;
struct fdtable *fdt;
int max_fdset;
+ /* Allocate small arguments on the stack to save memory and be faster */
+ char stack_pps[POLL_STACK_ALLOC];
+ struct poll_list *stack_pp = NULL;
/* Do a sanity check on nfds ... */
rcu_read_lock();
@@ -498,14 +509,23 @@ asmlinkage long sys_poll(struct pollfd _
err = -ENOMEM;
while(i!=0) {
struct poll_list *pp;
- pp = kmalloc(sizeof(struct poll_list)+
- sizeof(struct pollfd)*
- (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
- GFP_KERNEL);
- if(pp==NULL)
- goto out_fds;
+ int num, size;
+ if (stack_pp == NULL)
+ num = N_STACK_PPS;
+ else
+ num = POLLFD_PER_PAGE;
+ if (num > i)
+ num = i;
+ size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
+ if (!stack_pp)
+ stack_pp = pp = (struct poll_list *)stack_pps;
+ else {
+ pp = kmalloc(size, GFP_KERNEL);
+ if (!pp)
+ goto out_fds;
+ }
pp->next=NULL;
- pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+ pp->len = num;
if (head == NULL)
head = pp;
else
@@ -513,7 +533,7 @@ asmlinkage long sys_poll(struct pollfd _
walk = pp;
if (copy_from_user(pp->entries, ufds + nfds-i,
- sizeof(struct pollfd)*pp->len)) {
+ sizeof(struct pollfd)*num)) {
err = -EFAULT;
goto out_fds;
}
@@ -541,7 +561,8 @@ out_fds:
walk = head;
while(walk!=NULL) {
struct poll_list *pp = walk->next;
- kfree(walk);
+ if (walk != stack_pp)
+ kfree(walk);
walk = pp;
}
poll_freewait(&table);
Index: linux-2.6.15rc7-work/include/linux/poll.h
===================================================================
--- linux-2.6.15rc7-work.orig/include/linux/poll.h
+++ linux-2.6.15rc7-work/include/linux/poll.h
@@ -11,6 +11,15 @@
#include <linux/mm.h>
#include <asm/uaccess.h>
+/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
+ additional memory. */
+#define MAX_STACK_ALLOC 832
+#define FRONTEND_STACK_ALLOC 256
+#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC
+#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
+#define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC)
+#define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry))
+
struct poll_table_struct;
/*
@@ -33,13 +42,21 @@ static inline void init_poll_funcptr(pol
pt->qproc = qproc;
}
+struct poll_table_entry {
+ struct file * filp;
+ wait_queue_t wait;
+ wait_queue_head_t * wait_address;
+};
+
/*
* Structures and helpers for sys_poll/sys_poll
*/
-struct poll_wqueues {
- poll_table pt;
+struct poll_wqueues {
+ poll_table pt;
struct poll_table_page * table;
int error;
+ int inline_index;
+ struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
};
extern void poll_initwait(struct poll_wqueues *pwq);
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 20:58 [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack Andi Kleen
@ 2006-01-03 22:14 ` Eric Dumazet
2006-01-03 23:05 ` Andi Kleen
2006-01-03 23:07 ` Eric Dumazet
2006-01-03 22:22 ` Jesper Juhl
2006-01-04 0:28 ` Arnd Bergmann
2 siblings, 2 replies; 11+ messages in thread
From: Eric Dumazet @ 2006-01-03 22:14 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
Andi Kleen a écrit :
> This is a RFC for now. I would be interested in testing
> feedback. Patch is for 2.6.15.
>
> Optimize select and poll by a using stack space for small fd sets
>
> This brings back an old optimization from Linux 2.0. Using
> the stack is faster than kmalloc. On a Intel P4 system
> it speeds up a select of a single pty fd by about 13%
> (~4000 cycles -> ~3500)
Was this result on UP or SMP kernel ? Preempt or not ?
I think we might play in do_pollfd() and use fget_light()/fput_light() instead
of fget()/fput() that are somewhat expensive because of atomic inc/dec on SMP.
(I believe that select()/poll() based daemons are mostly non multi-threaded,
since high performance multi-threaded programs should be using epoll...)
Eric
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 22:14 ` Eric Dumazet
@ 2006-01-03 23:05 ` Andi Kleen
2006-01-03 23:07 ` Eric Dumazet
1 sibling, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-03 23:05 UTC (permalink / raw)
To: Eric Dumazet; +Cc: linux-kernel
On Tuesday 03 January 2006 23:14, Eric Dumazet wrote:
> Andi Kleen a écrit :
> > This is a RFC for now. I would be interested in testing
> > feedback. Patch is for 2.6.15.
> >
> > Optimize select and poll by a using stack space for small fd sets
> >
> > This brings back an old optimization from Linux 2.0. Using
> > the stack is faster than kmalloc. On a Intel P4 system
> > it speeds up a select of a single pty fd by about 13%
> > (~4000 cycles -> ~3500)
>
> Was this result on UP or SMP kernel ? Preempt or not ?
SMP kernel, non preempt, on a uniprocessor hyperthreaded CPU.
>
> I think we might play in do_pollfd() and use fget_light()/fput_light()
> instead of fget()/fput() that are somewhat expensive because of atomic
> inc/dec on SMP.
One idea was to just cache the file references over multiple syscalls and only
free them using a timer or when some thread calls close().
This would also avoid taking the spinlocks to set up the wait queues.
Then a new select or poll would just check if the input set matches
the caches and only fix up what changed.
But i didn't implement this because I would be quite a bit more complicated
instead of this simple patch.
>
> (I believe that select()/poll() based daemons are mostly non
> multi-threaded, since high performance multi-threaded programs should be
> using epoll...)
Yes, epoll should have a similar effect.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 22:14 ` Eric Dumazet
2006-01-03 23:05 ` Andi Kleen
@ 2006-01-03 23:07 ` Eric Dumazet
2006-01-03 23:13 ` Andi Kleen
1 sibling, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2006-01-03 23:07 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 711 bytes --]
Eric Dumazet a écrit :
> Andi Kleen a écrit :
>> This is a RFC for now. I would be interested in testing
>> feedback. Patch is for 2.6.15.
>>
>> Optimize select and poll by a using stack space for small fd sets
>>
>> This brings back an old optimization from Linux 2.0. Using
>> the stack is faster than kmalloc. On a Intel P4 system
>> it speeds up a select of a single pty fd by about 13%
>> (~4000 cycles -> ~3500)
>
> Was this result on UP or SMP kernel ? Preempt or not ?
>
> I think we might play in do_pollfd() and use fget_light()/fput_light()
> instead of fget()/fput() that are somewhat expensive because of atomic
> inc/dec on SMP.
>
Just for completeness I include this patch against 2.6.15
[-- Attachment #2: select_fget_light.patch --]
[-- Type: text/plain, Size: 1165 bytes --]
--- linux-2.6.15/fs/select.c 2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-ed/fs/select.c 2006-01-04 00:04:10.000000000 +0100
@@ -221,17 +221,18 @@
}
for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+ int fput_needed;
if (i >= n)
break;
if (!(bit & all_bits))
continue;
- file = fget(i);
+ file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll)
mask = (*f_op->poll)(file, retval ? NULL : wait);
- fput(file);
+ fput_light(file, fput_needed);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
@@ -417,14 +418,15 @@
fdp = fdpage+i;
fd = fdp->fd;
if (fd >= 0) {
- struct file * file = fget(fd);
+ int fput_needed;
+ struct file * file = fget_light(fd, &fput_needed);
mask = POLLNVAL;
if (file != NULL) {
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll)
mask = file->f_op->poll(file, *pwait);
mask &= fdp->events | POLLERR | POLLHUP;
- fput(file);
+ fput_light(file, fput_needed);
}
if (mask) {
*pwait = NULL;
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 23:07 ` Eric Dumazet
@ 2006-01-03 23:13 ` Andi Kleen
0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-03 23:13 UTC (permalink / raw)
To: Eric Dumazet; +Cc: linux-kernel
On Wednesday 04 January 2006 00:07, Eric Dumazet wrote:
> Eric Dumazet a écrit :
> > Andi Kleen a écrit :
> >> This is a RFC for now. I would be interested in testing
> >> feedback. Patch is for 2.6.15.
> >>
> >> Optimize select and poll by a using stack space for small fd sets
> >>
> >> This brings back an old optimization from Linux 2.0. Using
> >> the stack is faster than kmalloc. On a Intel P4 system
> >> it speeds up a select of a single pty fd by about 13%
> >> (~4000 cycles -> ~3500)
> >
> > Was this result on UP or SMP kernel ? Preempt or not ?
> >
> > I think we might play in do_pollfd() and use fget_light()/fput_light()
> > instead of fget()/fput() that are somewhat expensive because of atomic
> > inc/dec on SMP.
>
> Just for completeness I include this patch against 2.6.15
Looks like a good idea.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 20:58 [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack Andi Kleen
2006-01-03 22:14 ` Eric Dumazet
@ 2006-01-03 22:22 ` Jesper Juhl
2006-01-03 23:07 ` Andi Kleen
2006-01-04 0:28 ` Arnd Bergmann
2 siblings, 1 reply; 11+ messages in thread
From: Jesper Juhl @ 2006-01-03 22:22 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On 1/3/06, Andi Kleen <ak@suse.de> wrote:
> This is a RFC for now. I would be interested in testing
> feedback. Patch is for 2.6.15.
>
> Optimize select and poll by a using stack space for small fd sets
>
> This brings back an old optimization from Linux 2.0. Using
> the stack is faster than kmalloc. On a Intel P4 system
[snip]
Got an easy way to benchmark this?
I'd like to test it on my box and provide some feedback, but I'd need
a way to benchmark, and if you have an easy way to do that already
figured out it would save me having to write my own :)
--
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please http://www.expita.com/nomime.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 22:22 ` Jesper Juhl
@ 2006-01-03 23:07 ` Andi Kleen
0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-03 23:07 UTC (permalink / raw)
To: Jesper Juhl; +Cc: linux-kernel
On Tuesday 03 January 2006 23:22, Jesper Juhl wrote:
> Got an easy way to benchmark this?
> I'd like to test it on my box and provide some feedback, but I'd need
> a way to benchmark, and if you have an easy way to do that already
> figured out it would save me having to write my own :)
I have a simple micro benchmark, but I don't think it makes
sense to repeat these measurements (they are not very interesting) I'm mostly
interested in functional testing- e.g. does it break anything for you?
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-03 20:58 [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack Andi Kleen
2006-01-03 22:14 ` Eric Dumazet
2006-01-03 22:22 ` Jesper Juhl
@ 2006-01-04 0:28 ` Arnd Bergmann
2006-01-04 0:33 ` Andi Kleen
2006-01-04 0:43 ` Eric Dumazet
2 siblings, 2 replies; 11+ messages in thread
From: Arnd Bergmann @ 2006-01-04 0:28 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel
On Tuesday 03 January 2006 20:58, Andi Kleen wrote:
>
> This is a RFC for now. I would be interested in testing
> feedback. Patch is for 2.6.15.
>
> Optimize select and poll by a using stack space for small fd sets
>
> This brings back an old optimization from Linux 2.0. Using
> the stack is faster than kmalloc. On a Intel P4 system
> it speeds up a select of a single pty fd by about 13%
> (~4000 cycles -> ~3500)
Hmm, can you include the same change for compat_sys_select()?
When that was introduced, sys_select and compat_sys_select were
basically identical in their code, which makes it a lot easier
to verify that the compat_ version is correct.
Interestingly, doing a diff between sys_select and compat_sys_select
in the current kernel seems to suggest that they are both buggy
in that they miss checks for failing __put_user, but in /different/
places.
Arnd <><
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-04 0:28 ` Arnd Bergmann
@ 2006-01-04 0:33 ` Andi Kleen
2006-01-04 0:43 ` Eric Dumazet
1 sibling, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-04 0:33 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: linux-kernel
On Wednesday 04 January 2006 01:28, Arnd Bergmann wrote:
> Hmm, can you include the same change for compat_sys_select()?
> When that was introduced, sys_select and compat_sys_select were
> basically identical in their code, which makes it a lot easier
> to verify that the compat_ version is correct.
Ah good point. I forgot about those. Will fix them up for the next version
of the patch.
> Interestingly, doing a diff between sys_select and compat_sys_select
> in the current kernel seems to suggest that they are both buggy
> in that they miss checks for failing __put_user, but in /different/
> places.
Ok will do a sync first.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-04 0:28 ` Arnd Bergmann
2006-01-04 0:33 ` Andi Kleen
@ 2006-01-04 0:43 ` Eric Dumazet
2006-01-04 0:48 ` Andi Kleen
1 sibling, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2006-01-04 0:43 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: Andi Kleen, linux-kernel
Arnd Bergmann a écrit :
> Interestingly, doing a diff between sys_select and compat_sys_select
> in the current kernel seems to suggest that they are both buggy
> in that they miss checks for failing __put_user, but in /different/
> places.
And I would like to pinpoint that set_fd_set() implementation seems *buggy* :
It should not use __copy_to_user() but the real one (copy_to_user()) because
the calling thread could have slept in do_select() and another thread played
mm games during this sleep.
Eric
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack
2006-01-04 0:43 ` Eric Dumazet
@ 2006-01-04 0:48 ` Andi Kleen
0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2006-01-04 0:48 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Arnd Bergmann, linux-kernel
On Wednesday 04 January 2006 01:43, Eric Dumazet wrote:
> And I would like to pinpoint that set_fd_set() implementation seems *buggy*
> :
>
> It should not use __copy_to_user() but the real one (copy_to_user())
> because the calling thread could have slept in do_select() and another
> thread played mm games during this sleep.
__ only skips the access_ok which checks the kernel/user boundary, and the
kernel/user boundary doesn't change even while sleeping.
On very early 386s it did something more to work around a CPU bug, but that is
racy on multithreaded processes in any case. Not really worth caring about.
Also I doubt any such machines are left in working condition. That old
workaround code could be probably safely removed by now.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2006-01-04 0:48 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-03 20:58 [PATCH] [RFC] Optimize select/poll by putting small data sets on the stack Andi Kleen
2006-01-03 22:14 ` Eric Dumazet
2006-01-03 23:05 ` Andi Kleen
2006-01-03 23:07 ` Eric Dumazet
2006-01-03 23:13 ` Andi Kleen
2006-01-03 22:22 ` Jesper Juhl
2006-01-03 23:07 ` Andi Kleen
2006-01-04 0:28 ` Arnd Bergmann
2006-01-04 0:33 ` Andi Kleen
2006-01-04 0:43 ` Eric Dumazet
2006-01-04 0:48 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox