* epoll_pwait
@ 2006-10-03 7:27 Andrew Morton
2006-10-03 7:57 ` epoll_pwait David Woodhouse
2006-10-03 17:31 ` epoll_pwait Davide Libenzi
0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2006-10-03 7:27 UTC (permalink / raw)
To: linux-arch
Cc: Davide Libenzi, David Woodhouse, Michael Kerrisk, Ulrich Drepper,
Roland McGrath
I'm about to send this Linuswards. Architectures which implement
TIF_RESTORE_SIGMASK can wire it up.
David, do you have a test app which people can use?
Thanks.
From: Davide Libenzi <davidel@xmailserver.org>
Implement the epoll_pwait system call, that extend the event wait mechanism
with the same logic ppoll and pselect do. The definition of epoll_pwait
is:
int epoll_pwait(int epfd, struct epoll_event *events, int maxevents,
int timeout, const sigset_t *sigmask, size_t sigsetsize);
The difference between the vanilla epoll_wait and epoll_pwait is that the
latter allows the caller to specify a signal mask to be set while waiting
for events. Hence epoll_pwait will wait until either one monitored event,
or an unmasked signal happen. If sigmask is NULL, the epoll_pwait system
call will act exactly like epoll_wait. For the POSIX definition of
pselect, information is available here:
http://www.opengroup.org/onlinepubs/009695399/functions/select.html
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
arch/i386/kernel/syscall_table.S | 1
fs/eventpoll.c | 55 +++++++++++++++++++++++++++--
include/asm-i386/unistd.h | 3 +
include/linux/syscalls.h | 3 +
4 files changed, 58 insertions(+), 4 deletions(-)
diff -puN arch/i386/kernel/syscall_table.S~epoll_pwait arch/i386/kernel/syscall_table.S
--- a/arch/i386/kernel/syscall_table.S~epoll_pwait
+++ a/arch/i386/kernel/syscall_table.S
@@ -318,3 +318,4 @@ ENTRY(sys_call_table)
.long sys_vmsplice
.long sys_move_pages
.long sys_getcpu
+ .long sys_epoll_pwait
diff -puN fs/eventpoll.c~epoll_pwait fs/eventpoll.c
--- a/fs/eventpoll.c~epoll_pwait
+++ a/fs/eventpoll.c
@@ -105,6 +105,8 @@
/* Maximum msec timeout value storeable in a long int */
#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
struct epoll_filefd {
struct file *file;
@@ -497,7 +499,7 @@ void eventpoll_release_file(struct file
*/
asmlinkage long sys_epoll_create(int size)
{
- int error, fd;
+ int error, fd = -1;
struct eventpoll *ep;
struct inode *inode;
struct file *file;
@@ -640,7 +642,6 @@ eexit_1:
return error;
}
-#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
@@ -657,7 +658,7 @@ asmlinkage long sys_epoll_wait(int epfd,
current, epfd, events, maxevents, timeout));
/* The maximum number of event must be greater than zero */
- if (maxevents <= 0 || maxevents > MAX_EVENTS)
+ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
return -EINVAL;
/* Verify that the area passed by the user is writeable */
@@ -699,6 +700,54 @@ eexit_1:
}
+#ifdef TIF_RESTORE_SIGMASK
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout, const sigset_t __user *sigmask,
+ size_t sigsetsize)
+{
+ int error;
+ sigset_t ksigmask, sigsaved;
+
+ /*
+ * If the caller wants a certain signal mask to be set during the wait,
+ * we apply it here.
+ */
+ if (sigmask) {
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+ return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
+ error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+ /*
+ * If we changed the signal mask, we need to restore the original one.
+ * In case we've got a signal while waiting, we do not restore the signal
+ * mask yet, and we allow do_signal() to deliver the signal on the way back
+ * to userspace, before the signal mask is restored.
+ */
+ if (sigmask) {
+ if (error == -EINTR) {
+ memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved));
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ } else
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ }
+
+ return error;
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+
/*
* Creates the file descriptor to be used by the epoll interface.
*/
diff -puN include/asm-i386/unistd.h~epoll_pwait include/asm-i386/unistd.h
--- a/include/asm-i386/unistd.h~epoll_pwait
+++ a/include/asm-i386/unistd.h
@@ -324,10 +324,11 @@
#define __NR_vmsplice 316
#define __NR_move_pages 317
#define __NR_getcpu 318
+#define __NR_epoll_pwait 319
#ifdef __KERNEL__
-#define NR_syscalls 319
+#define NR_syscalls 320
#include <linux/err.h>
/*
diff -puN include/linux/syscalls.h~epoll_pwait include/linux/syscalls.h
--- a/include/linux/syscalls.h~epoll_pwait
+++ a/include/linux/syscalls.h
@@ -431,6 +431,9 @@ asmlinkage long sys_epoll_ctl(int epfd,
struct epoll_event __user *event);
asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout);
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout, const sigset_t __user *sigmask,
+ size_t sigsetsize);
asmlinkage long sys_gethostname(char __user *name, int len);
asmlinkage long sys_sethostname(char __user *name, int len);
asmlinkage long sys_setdomainname(char __user *name, int len);
_
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 7:27 epoll_pwait Andrew Morton
@ 2006-10-03 7:57 ` David Woodhouse
2006-10-03 8:42 ` epoll_pwait David Woodhouse
2006-10-03 17:31 ` epoll_pwait Davide Libenzi
1 sibling, 1 reply; 7+ messages in thread
From: David Woodhouse @ 2006-10-03 7:57 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-arch, Davide Libenzi, Michael Kerrisk, Ulrich Drepper,
Roland McGrath
On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
>
> I'm about to send this Linuswards. Architectures which implement
> TIF_RESTORE_SIGMASK can wire it up.
It needs compat_sys_epoll_wait() for the signal stuff though.
Given the horridness in the definition of 'struct epoll_event', I'm not
finding it easy to convince myself that we don't need compat versions of
other epoll syscalls, but I have a vague recollection that we looked at
this before and it's OK -- i386 is the only 32-bit architecture which
lets the u64 be non-64-bit-aligned? Could do with a better comment
though.
--
dwmw2
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 7:57 ` epoll_pwait David Woodhouse
@ 2006-10-03 8:42 ` David Woodhouse
2006-10-03 15:21 ` epoll_pwait Matthew Wilcox
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: David Woodhouse @ 2006-10-03 8:42 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-arch, Davide Libenzi, Michael Kerrisk, Ulrich Drepper,
Roland McGrath, paulus
On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> >
> > I'm about to send this Linuswards. Architectures which implement
> > TIF_RESTORE_SIGMASK can wire it up.
>
> It needs compat_sys_epoll_wait() for the signal stuff though.
Something like the patch below ought to suffice, although it's not
tested further than just building it.
----
[PATCH] Provide compat_sys_epoll_pwait(), wire up on PowerPC.
Also add a comment trying to make sense of the definition of
struct epoll_event.
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
diff -u a/fs/eventpoll.c b/fs/eventpoll.c
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -35,6 +35,7 @@
#include <linux/mount.h>
#include <linux/bitops.h>
#include <linux/mutex.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -745,6 +746,53 @@
return error;
}
+#ifdef CONFIG_COMPAT
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize)
+{
+ int error;
+ compat_sigset_t ss32;
+ sigset_t ksigmask, sigsaved;
+
+ /*
+ * If the caller wants a certain signal mask to be set during the wait,
+ * we apply it here.
+ */
+ if (sigmask) {
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+ if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ return -EFAULT;
+ sigset_from_compat(&ksigmask, &ss32);
+
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
+ error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+ /*
+ * If we changed the signal mask, we need to restore the original one.
+ * In case we've got a signal while waiting, we do not restore the signal
+ * mask yet, and we allow do_signal() to deliver the signal on the way back
+ * to userspace, before the signal mask is restored.
+ */
+ if (sigmask) {
+ if (error == -EINTR) {
+ memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved));
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ } else
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ }
+
+ return error;
+}
+#endif /* CONFIG_COMPAT */
+
#endif /* #ifdef TIF_RESTORE_SIGMASK */
only in patch2:
unchanged:
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -304,3 +304,4 @@ SYSCALL_SPU(fchmodat)
SYSCALL_SPU(faccessat)
COMPAT_SYS_SPU(get_robust_list)
COMPAT_SYS_SPU(set_robust_list)
+COMPAT_SYS(epoll_pwait)
only in patch2:
unchanged:
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -323,10 +323,11 @@ #define __NR_fchmodat 297
#define __NR_faccessat 298
#define __NR_get_robust_list 299
#define __NR_set_robust_list 300
+#define __NR_epoll_pwait 301
#ifdef __KERNEL__
-#define __NR_syscalls 301
+#define __NR_syscalls 302
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
only in patch2:
unchanged:
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -29,8 +29,11 @@ #define EPOLLONESHOT (1 << 30)
#define EPOLLET (1 << 31)
/*
- * On x86-64 make the 64bit structure have the same alignment as the
- * 32bit structure. This makes 32bit emulation easier.
+ * On i386, the u64 won't be aligned to 64-bits. So on x86_64 we pack
+ * the structure to be compatible. It seems that other 32-bit architectures
+ * will align the u64 naturally anyway, so we don't have to worry there.
+ * For example, both ppc and ppc64 put an extra 32 bits of padding in
+ * between the fields.
*/
#ifdef __x86_64__
#define EPOLL_PACKED __attribute__((packed))
--
dwmw2
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 8:42 ` epoll_pwait David Woodhouse
@ 2006-10-03 15:21 ` Matthew Wilcox
2006-10-13 0:03 ` epoll_pwait Davide Libenzi
2006-12-01 14:53 ` epoll_pwait Heiko Carstens
2 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2006-10-03 15:21 UTC (permalink / raw)
To: David Woodhouse
Cc: Andrew Morton, linux-arch, Davide Libenzi, Michael Kerrisk,
Ulrich Drepper, Roland McGrath, paulus
On Tue, Oct 03, 2006 at 09:42:29AM +0100, David Woodhouse wrote:
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -29,8 +29,11 @@ #define EPOLLONESHOT (1 << 30)
> #define EPOLLET (1 << 31)
>
> /*
> - * On x86-64 make the 64bit structure have the same alignment as the
> - * 32bit structure. This makes 32bit emulation easier.
> + * On i386, the u64 won't be aligned to 64-bits. So on x86_64 we pack
> + * the structure to be compatible. It seems that other 32-bit architectures
> + * will align the u64 naturally anyway, so we don't have to worry there.
> + * For example, both ppc and ppc64 put an extra 32 bits of padding in
> + * between the fields.
> */
> #ifdef __x86_64__
> #define EPOLL_PACKED __attribute__((packed))
Obviously, this should be
#if (defined(__x86_64__) || defined(__ia64))
since they both COMPAT i386.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 7:27 epoll_pwait Andrew Morton
2006-10-03 7:57 ` epoll_pwait David Woodhouse
@ 2006-10-03 17:31 ` Davide Libenzi
1 sibling, 0 replies; 7+ messages in thread
From: Davide Libenzi @ 2006-10-03 17:31 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-arch, David Woodhouse, Michael Kerrisk, Ulrich Drepper,
Roland McGrath
On Tue, 3 Oct 2006, Andrew Morton wrote:
>
>
> I'm about to send this Linuswards. Architectures which implement
> TIF_RESTORE_SIGMASK can wire it up.
>
> David, do you have a test app which people can use?
The good&old one :)
http://www.xmailserver.org/epoll_pwait_test.c
- Davide
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 8:42 ` epoll_pwait David Woodhouse
2006-10-03 15:21 ` epoll_pwait Matthew Wilcox
@ 2006-10-13 0:03 ` Davide Libenzi
2006-12-01 14:53 ` epoll_pwait Heiko Carstens
2 siblings, 0 replies; 7+ messages in thread
From: Davide Libenzi @ 2006-10-13 0:03 UTC (permalink / raw)
To: David Woodhouse
Cc: Andrew Morton, linux-arch, Michael Kerrisk, Ulrich Drepper,
Roland McGrath, paulus
On Thu, 12 Oct 2006, David Woodhouse wrote:
> On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> > On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> > >
> > > I'm about to send this Linuswards. Architectures which implement
> > > TIF_RESTORE_SIGMASK can wire it up.
> >
> > It needs compat_sys_epoll_wait() for the signal stuff though.
>
> Something like the patch below ought to suffice, although it's not
> tested further than just building it.
I was thinking about adding epoll compat functions to compat.c, but at the
end the only user would have been IA64. Also, epoll_pwait has two
structures that needs to be filtered, sigset_t (for every 64 bits archs)
and epoll_event (only for IA64 ATM). And this would require two
compat_epoll_pwait. At the end, I'm ok with David's patch.
Acked-by: Davide Libenzi <davidel@xmailserver.org>
- Davide
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: epoll_pwait
2006-10-03 8:42 ` epoll_pwait David Woodhouse
2006-10-03 15:21 ` epoll_pwait Matthew Wilcox
2006-10-13 0:03 ` epoll_pwait Davide Libenzi
@ 2006-12-01 14:53 ` Heiko Carstens
2 siblings, 0 replies; 7+ messages in thread
From: Heiko Carstens @ 2006-12-01 14:53 UTC (permalink / raw)
To: David Woodhouse
Cc: Andrew Morton, linux-arch, Davide Libenzi, Michael Kerrisk,
Ulrich Drepper, Roland McGrath, paulus
On Tue, Oct 03, 2006 at 09:42:29AM +0100, David Woodhouse wrote:
> On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> > On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> > >
> > > I'm about to send this Linuswards. Architectures which implement
> > > TIF_RESTORE_SIGMASK can wire it up.
> >
> > It needs compat_sys_epoll_wait() for the signal stuff though.
>
> Something like the patch below ought to suffice, although it's not
> tested further than just building it.
>
> ----
> [PATCH] Provide compat_sys_epoll_pwait(), wire up on PowerPC.
>
> Also add a comment trying to make sense of the definition of
> struct epoll_event.
>
> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
> [...]
> +#ifdef CONFIG_COMPAT
> +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
> +
> +asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events,
> + int maxevents, int timeout,
> + const compat_sigset_t __user *sigmask,
> + compat_size_t sigsetsize)
> [...]
What about this one? It's not in 2.6.19 nor in -mm. But sys_epoll_pwait is.
And even more strange is that mips uses sys_epoll_pwait for compat syscalls?!
Did I miss something?
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-12-01 14:55 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-03 7:27 epoll_pwait Andrew Morton
2006-10-03 7:57 ` epoll_pwait David Woodhouse
2006-10-03 8:42 ` epoll_pwait David Woodhouse
2006-10-03 15:21 ` epoll_pwait Matthew Wilcox
2006-10-13 0:03 ` epoll_pwait Davide Libenzi
2006-12-01 14:53 ` epoll_pwait Heiko Carstens
2006-10-03 17:31 ` epoll_pwait Davide Libenzi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).