linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* epoll_pwait
@ 2006-10-03  7:27 Andrew Morton
  2006-10-03  7:57 ` epoll_pwait David Woodhouse
  2006-10-03 17:31 ` epoll_pwait Davide Libenzi
  0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2006-10-03  7:27 UTC (permalink / raw)
  To: linux-arch
  Cc: Davide Libenzi, David Woodhouse, Michael Kerrisk, Ulrich Drepper,
	Roland McGrath



I'm about to send this Linuswards.  Architectures which implement
TIF_RESTORE_SIGMASK can wire it up.

David, do you have a test app which people can use?

Thanks.


From: Davide Libenzi <davidel@xmailserver.org>

Implement the epoll_pwait system call, that extend the event wait mechanism
with the same logic ppoll and pselect do.  The definition of epoll_pwait
is:

int epoll_pwait(int epfd, struct epoll_event *events, int maxevents,
                 int timeout, const sigset_t *sigmask, size_t sigsetsize);

The difference between the vanilla epoll_wait and epoll_pwait is that the
latter allows the caller to specify a signal mask to be set while waiting
for events.  Hence epoll_pwait will wait until either one monitored event,
or an unmasked signal happen.  If sigmask is NULL, the epoll_pwait system
call will act exactly like epoll_wait.  For the POSIX definition of
pselect, information is available here:

http://www.opengroup.org/onlinepubs/009695399/functions/select.html

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/i386/kernel/syscall_table.S |    1 
 fs/eventpoll.c                   |   55 +++++++++++++++++++++++++++--
 include/asm-i386/unistd.h        |    3 +
 include/linux/syscalls.h         |    3 +
 4 files changed, 58 insertions(+), 4 deletions(-)

diff -puN arch/i386/kernel/syscall_table.S~epoll_pwait arch/i386/kernel/syscall_table.S
--- a/arch/i386/kernel/syscall_table.S~epoll_pwait
+++ a/arch/i386/kernel/syscall_table.S
@@ -318,3 +318,4 @@ ENTRY(sys_call_table)
 	.long sys_vmsplice
 	.long sys_move_pages
 	.long sys_getcpu
+	.long sys_epoll_pwait
diff -puN fs/eventpoll.c~epoll_pwait fs/eventpoll.c
--- a/fs/eventpoll.c~epoll_pwait
+++ a/fs/eventpoll.c
@@ -105,6 +105,8 @@
 /* Maximum msec timeout value storeable in a long int */
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
 
+#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
 
 struct epoll_filefd {
 	struct file *file;
@@ -497,7 +499,7 @@ void eventpoll_release_file(struct file 
  */
 asmlinkage long sys_epoll_create(int size)
 {
-	int error, fd;
+	int error, fd = -1;
 	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
@@ -640,7 +642,6 @@ eexit_1:
 	return error;
 }
 
-#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -657,7 +658,7 @@ asmlinkage long sys_epoll_wait(int epfd,
 		     current, epfd, events, maxevents, timeout));
 
 	/* The maximum number of event must be greater than zero */
-	if (maxevents <= 0 || maxevents > MAX_EVENTS)
+	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
 		return -EINVAL;
 
 	/* Verify that the area passed by the user is writeable */
@@ -699,6 +700,54 @@ eexit_1:
 }
 
 
+#ifdef TIF_RESTORE_SIGMASK
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+				int maxevents, int timeout, const sigset_t __user *sigmask,
+				size_t sigsetsize)
+{
+	int error;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the signal
+	 * mask yet, and we allow do_signal() to deliver the signal on the way back
+	 * to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved, sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+
 /*
  * Creates the file descriptor to be used by the epoll interface.
  */
diff -puN include/asm-i386/unistd.h~epoll_pwait include/asm-i386/unistd.h
--- a/include/asm-i386/unistd.h~epoll_pwait
+++ a/include/asm-i386/unistd.h
@@ -324,10 +324,11 @@
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
 #define __NR_getcpu		318
+#define __NR_epoll_pwait	319
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 319
+#define NR_syscalls 320
 #include <linux/err.h>
 
 /*
diff -puN include/linux/syscalls.h~epoll_pwait include/linux/syscalls.h
--- a/include/linux/syscalls.h~epoll_pwait
+++ a/include/linux/syscalls.h
@@ -431,6 +431,9 @@ asmlinkage long sys_epoll_ctl(int epfd, 
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 				int maxevents, int timeout);
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+				int maxevents, int timeout, const sigset_t __user *sigmask,
+				size_t sigsetsize);
 asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
_


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  7:27 epoll_pwait Andrew Morton
@ 2006-10-03  7:57 ` David Woodhouse
  2006-10-03  8:42   ` epoll_pwait David Woodhouse
  2006-10-03 17:31 ` epoll_pwait Davide Libenzi
  1 sibling, 1 reply; 7+ messages in thread
From: David Woodhouse @ 2006-10-03  7:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-arch, Davide Libenzi, Michael Kerrisk, Ulrich Drepper,
	Roland McGrath

On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> 
> I'm about to send this Linuswards.  Architectures which implement
> TIF_RESTORE_SIGMASK can wire it up.

It needs compat_sys_epoll_wait() for the signal stuff though.

Given the horridness in the definition of 'struct epoll_event', I'm not
finding it easy to convince myself that we don't need compat versions of
other epoll syscalls, but I have a vague recollection that we looked at
this before and it's OK -- i386 is the only 32-bit architecture which
lets the u64 be non-64-bit-aligned? Could do with a better comment
though.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  7:57 ` epoll_pwait David Woodhouse
@ 2006-10-03  8:42   ` David Woodhouse
  2006-10-03 15:21     ` epoll_pwait Matthew Wilcox
                       ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: David Woodhouse @ 2006-10-03  8:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-arch, Davide Libenzi, Michael Kerrisk, Ulrich Drepper,
	Roland McGrath, paulus

On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> > 
> > I'm about to send this Linuswards.  Architectures which implement
> > TIF_RESTORE_SIGMASK can wire it up.
> 
> It needs compat_sys_epoll_wait() for the signal stuff though.

Something like the patch below ought to suffice, although it's not
tested further than just building it.

----
[PATCH] Provide compat_sys_epoll_pwait(), wire up on PowerPC.

Also add a comment trying to make sense of the definition of
struct epoll_event.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff -u a/fs/eventpoll.c b/fs/eventpoll.c
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -35,6 +35,7 @@
 #include <linux/mount.h>
 #include <linux/bitops.h>
 #include <linux/mutex.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -745,6 +746,53 @@
 	return error;
 }
 
+#ifdef CONFIG_COMPAT
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+				       int maxevents, int timeout,
+				       const compat_sigset_t __user *sigmask,
+				       compat_size_t sigsetsize)
+{
+	int error;
+	compat_sigset_t ss32;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &ss32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the signal
+	 * mask yet, and we allow do_signal() to deliver the signal on the way back
+	 * to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved, sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;
+}
+#endif /* CONFIG_COMPAT */
+
 #endif /* #ifdef TIF_RESTORE_SIGMASK */
 
 
only in patch2:
unchanged:
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -304,3 +304,4 @@ SYSCALL_SPU(fchmodat)
 SYSCALL_SPU(faccessat)
 COMPAT_SYS_SPU(get_robust_list)
 COMPAT_SYS_SPU(set_robust_list)
+COMPAT_SYS(epoll_pwait)
only in patch2:
unchanged:
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -323,10 +323,11 @@ #define __NR_fchmodat		297
 #define __NR_faccessat		298
 #define __NR_get_robust_list	299
 #define __NR_set_robust_list	300
+#define __NR_epoll_pwait	301
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		301
+#define __NR_syscalls		302
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
only in patch2:
unchanged:
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -29,8 +29,11 @@ #define EPOLLONESHOT (1 << 30)
 #define EPOLLET (1 << 31)
 
 /* 
- * On x86-64 make the 64bit structure have the same alignment as the
- * 32bit structure. This makes 32bit emulation easier.
+ * On i386, the u64 won't be aligned to 64-bits. So on x86_64 we pack
+ * the structure to be compatible. It seems that other 32-bit architectures
+ * will align the u64 naturally anyway, so we don't have to worry there.
+ * For example, both ppc and ppc64 put an extra 32 bits of padding in 
+ * between the fields.
  */
 #ifdef __x86_64__
 #define EPOLL_PACKED __attribute__((packed))


-- 
dwmw2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  8:42   ` epoll_pwait David Woodhouse
@ 2006-10-03 15:21     ` Matthew Wilcox
  2006-10-13  0:03     ` epoll_pwait Davide Libenzi
  2006-12-01 14:53     ` epoll_pwait Heiko Carstens
  2 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2006-10-03 15:21 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andrew Morton, linux-arch, Davide Libenzi, Michael Kerrisk,
	Ulrich Drepper, Roland McGrath, paulus

On Tue, Oct 03, 2006 at 09:42:29AM +0100, David Woodhouse wrote:
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -29,8 +29,11 @@ #define EPOLLONESHOT (1 << 30)
>  #define EPOLLET (1 << 31)
>  
>  /* 
> - * On x86-64 make the 64bit structure have the same alignment as the
> - * 32bit structure. This makes 32bit emulation easier.
> + * On i386, the u64 won't be aligned to 64-bits. So on x86_64 we pack
> + * the structure to be compatible. It seems that other 32-bit architectures
> + * will align the u64 naturally anyway, so we don't have to worry there.
> + * For example, both ppc and ppc64 put an extra 32 bits of padding in 
> + * between the fields.
>   */
>  #ifdef __x86_64__
>  #define EPOLL_PACKED __attribute__((packed))

Obviously, this should be

#if (defined(__x86_64__) || defined(__ia64))

since they both COMPAT i386.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  7:27 epoll_pwait Andrew Morton
  2006-10-03  7:57 ` epoll_pwait David Woodhouse
@ 2006-10-03 17:31 ` Davide Libenzi
  1 sibling, 0 replies; 7+ messages in thread
From: Davide Libenzi @ 2006-10-03 17:31 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-arch, David Woodhouse, Michael Kerrisk, Ulrich Drepper,
	Roland McGrath

On Tue, 3 Oct 2006, Andrew Morton wrote:

> 
> 
> I'm about to send this Linuswards.  Architectures which implement
> TIF_RESTORE_SIGMASK can wire it up.
> 
> David, do you have a test app which people can use?

The good&old one :)

http://www.xmailserver.org/epoll_pwait_test.c



- Davide



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  8:42   ` epoll_pwait David Woodhouse
  2006-10-03 15:21     ` epoll_pwait Matthew Wilcox
@ 2006-10-13  0:03     ` Davide Libenzi
  2006-12-01 14:53     ` epoll_pwait Heiko Carstens
  2 siblings, 0 replies; 7+ messages in thread
From: Davide Libenzi @ 2006-10-13  0:03 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andrew Morton, linux-arch, Michael Kerrisk, Ulrich Drepper,
	Roland McGrath, paulus

On Thu, 12 Oct 2006, David Woodhouse wrote:

> On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> > On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> > > 
> > > I'm about to send this Linuswards.  Architectures which implement
> > > TIF_RESTORE_SIGMASK can wire it up.
> > 
> > It needs compat_sys_epoll_wait() for the signal stuff though.
> 
> Something like the patch below ought to suffice, although it's not
> tested further than just building it.

I was thinking about adding epoll compat functions to compat.c, but at the 
end the only user would have been IA64. Also, epoll_pwait has two 
structures that needs to be filtered, sigset_t (for every 64 bits archs) 
and epoll_event (only for IA64 ATM). And this would require two 
compat_epoll_pwait. At the end, I'm ok with David's patch.


Acked-by: Davide Libenzi <davidel@xmailserver.org>



- Davide



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: epoll_pwait
  2006-10-03  8:42   ` epoll_pwait David Woodhouse
  2006-10-03 15:21     ` epoll_pwait Matthew Wilcox
  2006-10-13  0:03     ` epoll_pwait Davide Libenzi
@ 2006-12-01 14:53     ` Heiko Carstens
  2 siblings, 0 replies; 7+ messages in thread
From: Heiko Carstens @ 2006-12-01 14:53 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andrew Morton, linux-arch, Davide Libenzi, Michael Kerrisk,
	Ulrich Drepper, Roland McGrath, paulus

On Tue, Oct 03, 2006 at 09:42:29AM +0100, David Woodhouse wrote:
> On Tue, 2006-10-03 at 08:57 +0100, David Woodhouse wrote:
> > On Tue, 2006-10-03 at 00:27 -0700, Andrew Morton wrote:
> > > 
> > > I'm about to send this Linuswards.  Architectures which implement
> > > TIF_RESTORE_SIGMASK can wire it up.
> > 
> > It needs compat_sys_epoll_wait() for the signal stuff though.
> 
> Something like the patch below ought to suffice, although it's not
> tested further than just building it.
> 
> ----
> [PATCH] Provide compat_sys_epoll_pwait(), wire up on PowerPC.
> 
> Also add a comment trying to make sense of the definition of
> struct epoll_event.
> 
> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
> [...]
> +#ifdef CONFIG_COMPAT
> +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
> +
> +asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events,
> +				       int maxevents, int timeout,
> +				       const compat_sigset_t __user *sigmask,
> +				       compat_size_t sigsetsize)
> [...]

What about this one? It's not in 2.6.19 nor in -mm. But sys_epoll_pwait is.
And even more strange is that mips uses sys_epoll_pwait for compat syscalls?!
Did I miss something?

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-12-01 14:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-03  7:27 epoll_pwait Andrew Morton
2006-10-03  7:57 ` epoll_pwait David Woodhouse
2006-10-03  8:42   ` epoll_pwait David Woodhouse
2006-10-03 15:21     ` epoll_pwait Matthew Wilcox
2006-10-13  0:03     ` epoll_pwait Davide Libenzi
2006-12-01 14:53     ` epoll_pwait Heiko Carstens
2006-10-03 17:31 ` epoll_pwait Davide Libenzi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).