All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 1/4] signalfd v1 - signalfd core ...
@ 2007-03-07  1:36 Davide Libenzi
  2007-03-07  1:43 ` Davide Libenzi
  2007-03-07  4:55 ` Stephen Rothwell
  0 siblings, 2 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07  1:36 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Andrew Morton, Linus Torvalds


This patch series implements a new signalfd() system call. I took part of 
the original Linus code (and you know how badly it can be broken :), and I 
added even more breakage ;)
The patch had to be almost completely changed. This patch allows multiple 
signalfd to listen for signals on the same sighand, w/out raing with 
dequeue_signal. Plus other changes that I don't remember (see here for the 
original patch http://tinyurl.com/3yuna5 ).
This seems to be working fine on my Dual Opteron machine. I made a quick 
test program for it:

http://www.xmailserver.org/signafd-test.c

The signalfd() system call implements signal delivery into a file 
descriptor receiver. The signalfd file descriptor if created with the 
following API:

int signalfd(int ufd, const sigset_t *mask, size_t masksize);

The "ufd" parameter allows to change an existing signalfd sigmask, w/out 
going to close/create cycle (Linus idea). Use "ufd" == -1 if you want a 
brand new signalfd file.
The "mask" allows to specify the signal mask of signals that we are 
interested in. The "masksize" parameter is the size of "mask".
Note that signalfd delivery and standard signal delivery can go in 
parallel. So you can receive signals on the signalfd file, and on the 
signal handlers. This makes the system more flexible IMO. If you don't 
want to see standard delivery, just pass the same "mask" to 
sigprocmask(SIG_BLOCK).
The signalfd fd supports poll(2) and read(2). The poll(2) will return 
POLLIN when signals are available to be dequeued.
The read(2) call will read u32 signal numbers that landed over the 
signalfd. It returns the size of the data copied, or zero if the sighand 
we are attached to, has been detached.



Signed-off-by: Davide Libenzi <davidel@xmailserver.org>



- Davide



fs/Makefile               |    2 
fs/exec.c                 |   12 +
fs/signalfd.c             |  293 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/init_task.h |    1 
include/linux/sched.h     |    1 
include/linux/signalfd.h  |   26 ++++
include/linux/syscalls.h  |    1 
kernel/exit.c             |    5 
kernel/fork.c             |    4 
kernel/signal.c           |   11 +
10 files changed, 352 insertions(+), 4 deletions(-)



Index: linux-2.6.20.ep2/fs/signalfd.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.20.ep2/fs/signalfd.c	2007-03-06 15:47:55.000000000 -0800
@@ -0,0 +1,293 @@
+/*
+ *  fs/signalfd.c
+ *
+ *  Copyright (C) 2003  Linus Torvalds
+ *
+ *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org>
+ *      Changed signal delivery and de-queueing.
+ *      Now using anonymous inode source.
+ */
+
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+#include <linux/signalfd.h>
+
+#include <asm/uaccess.h>
+
+
+
+struct signalfd_ctx {
+	struct list_head lnk;
+	wait_queue_head_t wqh;
+	sigset_t sigmask;
+	sigset_t pending;
+	struct task_struct *tsk;
+	struct sighand_struct *sighand;
+};
+
+
+static void signalfd_cleanup(struct signalfd_ctx *ctx);
+static int signalfd_close(struct inode *inode, struct file *file);
+static unsigned int signalfd_poll(struct file *filp, poll_table *wait);
+static unsigned int signalfd_dequeue(struct signalfd_ctx *ctx);
+static ssize_t signalfd_read(struct file *filp, char *buf, size_t count, loff_t *ppos);
+
+
+
+static const struct file_operations signalfd_fops = {
+	.release	= signalfd_close,
+	.poll		= signalfd_poll,
+	.read		= signalfd_read,
+};
+static struct kmem_cache *signalfd_ctx_cachep;
+
+
+/*
+ * This must be called with the sighand lock held.
+ */
+int signalfd_deliver(struct sighand_struct *sighand, int sig)
+{
+	int nsig = 0;
+	struct list_head *pos;
+	struct signalfd_ctx *ctx;
+
+	list_for_each(pos, &sighand->sfdlist) {
+		ctx = list_entry(pos, struct signalfd_ctx, lnk);
+		/*
+		 * We use a negative signal value as a way to broadcast that the
+		 * sighand has been orphaned, so that we can notify all the
+		 * listeners about this.
+		 */
+		if (sig < 0)
+			__wake_up_locked(&ctx->wqh, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
+		else if (sigismember(&ctx->sigmask, sig)) {
+			sigaddset(&ctx->pending, sig);
+			__wake_up_locked(&ctx->wqh, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
+			nsig++;
+		}
+	}
+
+	return nsig;
+}
+
+
+/*
+ * Create a file descriptor that is associated with our signal
+ * state. We can pass it around to others if we want to, but
+ * it will always be _our_ signal state.
+ */
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
+{
+	int error;
+	sigset_t sigmask;
+	struct signalfd_ctx *ctx;
+	struct file *file;
+	struct inode *inode;
+
+	error = -EINVAL;
+	if (sizemask != sizeof(sigset_t) ||
+	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
+		goto err_exit;
+	sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+	if (ufd == -1) {
+		error = -ENOMEM;
+		ctx = kmem_cache_alloc(signalfd_ctx_cachep, GFP_KERNEL);
+		if (!ctx)
+			goto err_exit;
+
+		init_waitqueue_head(&ctx->wqh);
+		memset(&ctx->pending, 0, sizeof(ctx->pending));
+		ctx->sigmask = sigmask;
+		ctx->tsk = current;
+		get_task_struct(current);
+
+		/*
+		 * We also increment the sighand count to make sure
+		 * it doesn't go away from us in poll() when the task
+		 * exits (which can happen if the fd is passed to
+		 * another process with unix domain sockets.
+		 *
+		 * This also guarantees that an execve() will reallocate
+		 * the signal state, and thus avoids security concerns
+		 * with a untrusted process that passes off the signal
+		 * queue fd to another, and then does a suid execve.
+		 */
+		ctx->sighand = current->sighand;
+		atomic_inc(&ctx->sighand->count);
+
+		/*
+		 * Add this fd to the list of signal listeners.
+		 */
+		spin_lock_irq(&ctx->sighand->siglock);
+		list_add_tail(&ctx->lnk, &ctx->sighand->sfdlist);
+		spin_unlock_irq(&ctx->sighand->siglock);
+
+		/*
+		 * When we call this, the initialization must be complete, since
+		 * aino_getfd() will install the fd.
+		 */
+		error = aino_getfd(&ufd, &inode, &file, "[signalfd]",
+				   &signalfd_fops, ctx);
+		if (error)
+			goto err_fdalloc;
+	} else {
+		error = -EBADF;
+		file = fget(ufd);
+		if (!file)
+			goto err_exit;
+		ctx = file->private_data;
+		error = -EINVAL;
+		if (file->f_op != &signalfd_fops) {
+			fput(file);
+			goto err_exit;
+		}
+		spin_lock_irq(&ctx->sighand->siglock);
+		ctx->sigmask = sigmask;
+		spin_unlock_irq(&ctx->sighand->siglock);
+		wake_up(&ctx->wqh);
+		fput(file);
+	}
+
+	return ufd;
+
+err_fdalloc:
+	signalfd_cleanup(ctx);
+err_exit:
+	return error;
+}
+
+
+static void signalfd_cleanup(struct signalfd_ctx *ctx)
+{
+	spin_lock_irq(&ctx->sighand->siglock);
+	list_del(&ctx->lnk);
+	spin_unlock_irq(&ctx->sighand->siglock);
+	__cleanup_sighand(ctx->sighand);
+	put_task_struct(ctx->tsk);
+	kmem_cache_free(signalfd_ctx_cachep, ctx);
+}
+
+
+static int signalfd_close(struct inode *inode, struct file *file)
+{
+	signalfd_cleanup(file->private_data);
+	return 0;
+}
+
+
+static unsigned int signalfd_poll(struct file *filp, poll_table *wait)
+{
+	struct signalfd_ctx *ctx = filp->private_data;
+	int i;
+	unsigned long const *pending, *mask;
+
+	poll_wait(filp, &ctx->wqh, wait);
+
+	/*
+	 * Let the caller get a POLLIN in this case, ala socket recv() when
+	 * the peer disconnect.
+	 */
+	if (ctx->sighand != ctx->tsk->sighand)
+		return POLLIN;
+
+	pending = ctx->pending.sig;
+	mask = ctx->sigmask.sig;
+	for (i = 0; i < _NSIG_WORDS; i++, pending++, mask++)
+		if (*pending & *mask)
+			break;
+
+	return i == _NSIG_WORDS ? 0: POLLIN;
+}
+
+
+static unsigned int signalfd_dequeue(struct signalfd_ctx *ctx)
+{
+	int i, bsig;
+	unsigned long isig;
+	unsigned long *pending, *mask;
+
+	pending = ctx->pending.sig;
+	mask = ctx->sigmask.sig;
+	for (i = 0; i < _NSIG_WORDS; i++, pending++, mask++) {
+		if ((isig = *pending & *mask) != 0) {
+			bsig = ffz(~isig);
+			*pending &= ~(1UL << bsig);
+			return bsig + i * _NSIG_BPW + 1;
+		}
+	}
+	return 0;
+}
+
+
+static ssize_t signalfd_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
+{
+	struct signalfd_ctx *ctx = filp->private_data;
+	struct sighand_struct *sighand = ctx->sighand;
+	int res;
+	u32 signr;
+	DECLARE_WAITQUEUE(wait, current);
+
+	if (count < sizeof(signr))
+		return -EINVAL;
+	count = sizeof(signr);
+	spin_lock_irq(&sighand->siglock);
+	__add_wait_queue(&ctx->wqh, &wait);
+	for (res = 0;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if ((signr = (u32) signalfd_dequeue(ctx)) != 0)
+			break;
+		if (sighand != ctx->tsk->sighand) {
+			/*
+			 * Let the caller read zero byte, ala socket recv() when
+			 * the peer disconnect.
+			 */
+			count = 0;
+			break;
+		}
+		if (signal_pending(current)) {
+			res = -EINTR;
+			break;
+		}
+		spin_unlock_irq(&sighand->siglock);
+		schedule();
+		spin_lock_irq(&sighand->siglock);
+	}
+	__remove_wait_queue(&ctx->wqh, &wait);
+	set_current_state(TASK_RUNNING);
+	spin_unlock_irq(&sighand->siglock);
+	if (res)
+		return res;
+	if (count && put_user(signr, buf))
+		count = -EFAULT;
+	return count;
+}
+
+
+static int __init signalfd_init(void)
+{
+	signalfd_ctx_cachep = kmem_cache_create("signalfd_ctx_cache",
+						sizeof(struct signalfd_ctx),
+						0, SLAB_PANIC, NULL, NULL);
+	return 0;
+}
+
+
+static void __exit signalfd_exit(void)
+{
+	kmem_cache_destroy(signalfd_ctx_cachep);
+}
+
+module_init(signalfd_init);
+module_exit(signalfd_exit);
+
+MODULE_LICENSE("GPL");
Index: linux-2.6.20.ep2/include/linux/init_task.h
===================================================================
--- linux-2.6.20.ep2.orig/include/linux/init_task.h	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/include/linux/init_task.h	2007-03-06 11:12:34.000000000 -0800
@@ -84,6 +84,7 @@
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
+	.sfdlist	= LIST_HEAD_INIT(sighand.sfdlist),		\
 }
 
 extern struct group_info init_groups;
Index: linux-2.6.20.ep2/include/linux/sched.h
===================================================================
--- linux-2.6.20.ep2.orig/include/linux/sched.h	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/include/linux/sched.h	2007-03-06 11:12:34.000000000 -0800
@@ -379,6 +379,7 @@
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
+	struct list_head        sfdlist;
 };
 
 struct pacct_struct {
Index: linux-2.6.20.ep2/kernel/fork.c
===================================================================
--- linux-2.6.20.ep2.orig/kernel/fork.c	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/kernel/fork.c	2007-03-06 11:12:34.000000000 -0800
@@ -1422,8 +1422,10 @@
 	struct sighand_struct *sighand = data;
 
 	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
-					SLAB_CTOR_CONSTRUCTOR)
+					SLAB_CTOR_CONSTRUCTOR) {
 		spin_lock_init(&sighand->siglock);
+		INIT_LIST_HEAD(&sighand->sfdlist);
+	}
 }
 
 void __init proc_caches_init(void)
Index: linux-2.6.20.ep2/include/linux/syscalls.h
===================================================================
--- linux-2.6.20.ep2.orig/include/linux/syscalls.h	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/include/linux/syscalls.h	2007-03-06 12:08:47.000000000 -0800
@@ -602,6 +602,7 @@
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
Index: linux-2.6.20.ep2/fs/Makefile
===================================================================
--- linux-2.6.20.ep2.orig/fs/Makefile	2007-03-06 11:12:23.000000000 -0800
+++ linux-2.6.20.ep2/fs/Makefile	2007-03-06 11:12:34.000000000 -0800
@@ -11,7 +11,7 @@
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
-		stack.o anon_inodes.o
+		stack.o anon_inodes.o signalfd.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
Index: linux-2.6.20.ep2/include/linux/signalfd.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.20.ep2/include/linux/signalfd.h	2007-03-06 11:12:34.000000000 -0800
@@ -0,0 +1,26 @@
+/*
+ *  include/linux/signalfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#ifndef _LINUX_SIGNALFD_H
+#define _LINUX_SIGNALFD_H
+
+
+int signalfd_deliver(struct sighand_struct *sighand, int sig);
+
+/*
+ * No need to fall inside signalfd_deliver() and get/release a spinlock,
+ * if no signal listeners are available.
+ */
+static inline int signalfd_notify(struct sighand_struct *sighand, int sig)
+{
+	if (likely(list_empty(&sighand->sfdlist)))
+		return 0;
+	return signalfd_deliver(sighand, sig);
+}
+
+#endif /* _LINUX_SIGNALFD_H */
+
Index: linux-2.6.20.ep2/kernel/signal.c
===================================================================
--- linux-2.6.20.ep2.orig/kernel/signal.c	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/kernel/signal.c	2007-03-06 11:12:34.000000000 -0800
@@ -26,6 +26,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <linux/signalfd.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -780,6 +781,11 @@
 	BUG_ON(!irqs_disabled());
 	assert_spin_locked(&t->sighand->siglock);
 
+	/*
+	 * Deliver the signal to listening signalfds ...
+	 */
+	signalfd_notify(t->sighand, sig);
+
 	/* Short-circuit ignored signals.  */
 	if (sig_ignored(t, sig))
 		goto out;
@@ -968,6 +974,11 @@
 	assert_spin_locked(&p->sighand->siglock);
 	handle_stop_signal(sig, p);
 
+	/*
+	 * Deliver the signal to listening signalfds ...
+	 */
+	signalfd_notify(p->sighand, sig);
+
 	/* Short-circuit ignored signals.  */
 	if (sig_ignored(p, sig))
 		return ret;
Index: linux-2.6.20.ep2/fs/exec.c
===================================================================
--- linux-2.6.20.ep2.orig/fs/exec.c	2007-03-06 11:09:00.000000000 -0800
+++ linux-2.6.20.ep2/fs/exec.c	2007-03-06 13:35:11.000000000 -0800
@@ -50,6 +50,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
+#include <linux/signalfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -755,11 +756,18 @@
 		recalc_sigpending();
 
 		spin_unlock(&newsighand->siglock);
+
+		/*
+		 * Tell all the sighand listeners that this sighand has
+		 * been detached. Needs to be called with the sighand lock
+		 * held.
+		 */
+		signalfd_notify(oldsighand, -1);
+
 		spin_unlock(&oldsighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 
-		if (atomic_dec_and_test(&oldsighand->count))
-			kmem_cache_free(sighand_cachep, oldsighand);
+		__cleanup_sighand(oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(tsk));
Index: linux-2.6.20.ep2/kernel/exit.c
===================================================================
--- linux-2.6.20.ep2.orig/kernel/exit.c	2007-03-06 12:56:22.000000000 -0800
+++ linux-2.6.20.ep2/kernel/exit.c	2007-03-06 13:34:42.000000000 -0800
@@ -42,6 +42,7 @@
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
 #include <linux/blkdev.h>
+#include <linux/signalfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -120,6 +121,10 @@
 
 	tsk->signal = NULL;
 	tsk->sighand = NULL;
+	/*
+	 * Notify that this sighand has been detached.
+	 */
+	signalfd_notify(sighand, -1);
 	spin_unlock(&sighand->siglock);
 	rcu_read_unlock();
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07  1:36 [patch 1/4] signalfd v1 - signalfd core Davide Libenzi
@ 2007-03-07  1:43 ` Davide Libenzi
  2007-03-07  4:55 ` Stephen Rothwell
  1 sibling, 0 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07  1:43 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Andrew Morton, Linus Torvalds

On Tue, 6 Mar 2007, Davide Libenzi wrote:

> This patch series implements a new signalfd() system call. I took part of 
> the original Linus code (and you know how badly it can be broken :), and I 
> added even more breakage ;)

I forgot to add that this requires the anonymous inode source contained in 
the previously posted epoll patch set.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07  1:36 [patch 1/4] signalfd v1 - signalfd core Davide Libenzi
  2007-03-07  1:43 ` Davide Libenzi
@ 2007-03-07  4:55 ` Stephen Rothwell
  2007-03-07  7:11   ` Davide Libenzi
  1 sibling, 1 reply; 20+ messages in thread
From: Stephen Rothwell @ 2007-03-07  4:55 UTC (permalink / raw)
  To: Davide Libenzi; +Cc: Linux Kernel Mailing List, Andrew Morton, Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 458 bytes --]

On Tue, 6 Mar 2007 17:36:56 -0800 (PST) Davide Libenzi <davidel@xmailserver.org> wrote:
>
> The read(2) call will read u32 signal numbers that landed over the
> signalfd. It returns the size of the data copied, or zero if the sighand
> we are attached to, has been detached.

So what about signals that the user asked for a siginfo_t to be returned
with?

--
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07  4:55 ` Stephen Rothwell
@ 2007-03-07  7:11   ` Davide Libenzi
  2007-03-07 12:38     ` Stephen Rothwell
  0 siblings, 1 reply; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07  7:11 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: Linux Kernel Mailing List, Andrew Morton, Linus Torvalds

On Wed, 7 Mar 2007, Stephen Rothwell wrote:

> On Tue, 6 Mar 2007 17:36:56 -0800 (PST) Davide Libenzi <davidel@xmailserver.org> wrote:
> >
> > The read(2) call will read u32 signal numbers that landed over the
> > signalfd. It returns the size of the data copied, or zero if the sighand
> > we are attached to, has been detached.
> 
> So what about signals that the user asked for a siginfo_t to be returned
> with?

O-Ren:   "You didn't think it was gonna be that easy, did you?"
B-Kiddo: "You know, for a second there, yeah, I kinda did."

:)

I could do that, since where I placed the signalfd_notify() I have the 
siginfo. But that is going to make code a little more complex, since the 
simple bitmaks needs to become a queue.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07  7:11   ` Davide Libenzi
@ 2007-03-07 12:38     ` Stephen Rothwell
  2007-03-07 16:57       ` Linus Torvalds
  2007-03-07 17:42       ` Davide Libenzi
  0 siblings, 2 replies; 20+ messages in thread
From: Stephen Rothwell @ 2007-03-07 12:38 UTC (permalink / raw)
  To: Davide Libenzi; +Cc: Linux Kernel Mailing List, Andrew Morton, Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 1345 bytes --]

On Tue, 6 Mar 2007 23:11:49 -0800 (PST) Davide Libenzi <davidel@xmailserver.org> wrote:
>
> On Wed, 7 Mar 2007, Stephen Rothwell wrote:
>
> > On Tue, 6 Mar 2007 17:36:56 -0800 (PST) Davide Libenzi <davidel@xmailserver.org> wrote:
> > >
> > > The read(2) call will read u32 signal numbers that landed over the
> > > signalfd. It returns the size of the data copied, or zero if the sighand
> > > we are attached to, has been detached.
> >
> > So what about signals that the user asked for a siginfo_t to be returned
> > with?
>
> O-Ren:   "You didn't think it was gonna be that easy, did you?"
> B-Kiddo: "You know, for a second there, yeah, I kinda did."
>
> :)
>
> I could do that, since where I placed the signalfd_notify() I have the
> siginfo. But that is going to make code a little more complex, since the
> simple bitmaks needs to become a queue.

Then, of course, you have to think about how to get the siginfo_t out to
the user process.  Do you just return it from the read after the read
that returns the signal number?  If so, you need to know if the process
did a compat read syscall read or a normal one ...

There may be a better solution.

You probably need the queue anyway because the real time signals are
supposed to queue.

--
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 12:38     ` Stephen Rothwell
@ 2007-03-07 16:57       ` Linus Torvalds
  2007-03-07 17:50         ` Davide Libenzi
  2007-03-07 17:42       ` Davide Libenzi
  1 sibling, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2007-03-07 16:57 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: Davide Libenzi, Linux Kernel Mailing List, Andrew Morton



On Wed, 7 Mar 2007, Stephen Rothwell wrote:
> 
> You probably need the queue anyway because the real time signals are
> supposed to queue.

Davide - the *real* problem is (I think) that you try to allow signals to 
be returned *both* by signalfd() and as a real signal.

That's wrong, wrong, wrong.

My original patch used "dequeue_signal()" to dequeue signals. Trust me, I 
did that for a reason. Your re-design to think that you can get the signal 
without using the real signal dequeueing is simply broken.

The signalfd must be an "either or" thing. Either you get it as a real 
signal, or you get it off the signalfd(). Not both. Not neither.

		Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 12:38     ` Stephen Rothwell
  2007-03-07 16:57       ` Linus Torvalds
@ 2007-03-07 17:42       ` Davide Libenzi
  1 sibling, 0 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 17:42 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: Linux Kernel Mailing List, Andrew Morton, Linus Torvalds

On Wed, 7 Mar 2007, Stephen Rothwell wrote:

> Then, of course, you have to think about how to get the siginfo_t out to
> the user process.  Do you just return it from the read after the read
> that returns the signal number?  If so, you need to know if the process
> did a compat read syscall read or a normal one ...

It can be a read(2) of a structure that does not need compat. It must be 
that way, because compat-on-read are out of question.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 16:57       ` Linus Torvalds
@ 2007-03-07 17:50         ` Davide Libenzi
  2007-03-07 20:30           ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 17:50 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Stephen Rothwell, Linux Kernel Mailing List, Andrew Morton

On Wed, 7 Mar 2007, Linus Torvalds wrote:

> On Wed, 7 Mar 2007, Stephen Rothwell wrote:
> > 
> > You probably need the queue anyway because the real time signals are
> > supposed to queue.
> 
> Davide - the *real* problem is (I think) that you try to allow signals to 
> be returned *both* by signalfd() and as a real signal.
> 
> That's wrong, wrong, wrong.
> 
> My original patch used "dequeue_signal()" to dequeue signals. Trust me, I 
> did that for a reason. Your re-design to think that you can get the signal 
> without using the real signal dequeueing is simply broken.

I think having the ability to have both can be usefull, so the idea of 
having multiple listeners fd (that both would not work with the single 
queue since they'd racing over dequeue_signal).
The two place where I plugges signalfd_notify() has the siginfo already 
compiled, so I just need to:

1) Pass the siginfo pointer to signalfd_notify too

2) Switch from having a bitmap, to a struct list_head queue

3) Dequeue and return the signal and siginfo inside a compat-free 
   structure

The #2 makes code even easier actually. The poll would just do a 
list_empty(), and dequeueing is just get-head and list_del().



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 17:50         ` Davide Libenzi
@ 2007-03-07 20:30           ` Jeremy Fitzhardinge
  2007-03-07 20:56             ` Davide Libenzi
  0 siblings, 1 reply; 20+ messages in thread
From: Jeremy Fitzhardinge @ 2007-03-07 20:30 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

Davide Libenzi wrote:
> On Wed, 7 Mar 2007, Linus Torvalds wrote:
>
>   
>> On Wed, 7 Mar 2007, Stephen Rothwell wrote:
>>     
>>> You probably need the queue anyway because the real time signals are
>>> supposed to queue.
>>>       
>> Davide - the *real* problem is (I think) that you try to allow signals to 
>> be returned *both* by signalfd() and as a real signal.
>>
>> That's wrong, wrong, wrong.
>>
>> My original patch used "dequeue_signal()" to dequeue signals. Trust me, I 
>> did that for a reason. Your re-design to think that you can get the signal 
>> without using the real signal dequeueing is simply broken.
>>     
>
> I think having the ability to have both can be usefull, so the idea of 
> having multiple listeners fd (that both would not work with the single 
> queue since they'd racing over dequeue_signal).
>   

Do you have a specific use-case in mind?  Because I don't think this is
very useful at all; in fact it may be the opposite of useful.

Generally signals are "owned" by one particular piece of code, and if
you want to distribute the event further then you do that in the app, as
you have to do with a normal signal handler.  This code is the fd-based
analog of sigtimedwait, and so it should behave basically the same way. 
If multiple threads call sigtimedwait on the same signal set, then one
and only one will receive each signal.  This is good, because it means
you can have a thread pool processing signals (and other events) knowing
that there won't be duplicate processing.

I haven't looked at your code in detail, but it isn't clear from your
description whether you can have multiple signalfds for different
(distinct) sets of signals.  That would be useful as a first level of
demultiplexing between multiple user-mode signal consumers.  If you
allow that, you need to decide whether a read/poll on a signalfd blocks
until the head queued signal is part of the signal set, or if any queued
signal is in the set.

    J

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 20:30           ` Jeremy Fitzhardinge
@ 2007-03-07 20:56             ` Davide Libenzi
  2007-03-07 21:26               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 20:56 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

On Wed, 7 Mar 2007, Jeremy Fitzhardinge wrote:

> Davide Libenzi wrote:
> > On Wed, 7 Mar 2007, Linus Torvalds wrote:
> >
> >   
> >> On Wed, 7 Mar 2007, Stephen Rothwell wrote:
> >>     
> >>> You probably need the queue anyway because the real time signals are
> >>> supposed to queue.
> >>>       
> >> Davide - the *real* problem is (I think) that you try to allow signals to 
> >> be returned *both* by signalfd() and as a real signal.
> >>
> >> That's wrong, wrong, wrong.
> >>
> >> My original patch used "dequeue_signal()" to dequeue signals. Trust me, I 
> >> did that for a reason. Your re-design to think that you can get the signal 
> >> without using the real signal dequeueing is simply broken.
> >>     
> >
> > I think having the ability to have both can be usefull, so the idea of 
> > having multiple listeners fd (that both would not work with the single 
> > queue since they'd racing over dequeue_signal).
> >   
> 
> Do you have a specific use-case in mind?  Because I don't think this is
> very useful at all; in fact it may be the opposite of useful.
> 
> Generally signals are "owned" by one particular piece of code, and if
> you want to distribute the event further then you do that in the app, as
> you have to do with a normal signal handler.  This code is the fd-based
> analog of sigtimedwait, and so it should behave basically the same way. 
> If multiple threads call sigtimedwait on the same signal set, then one
> and only one will receive each signal.  This is good, because it means
> you can have a thread pool processing signals (and other events) knowing
> that there won't be duplicate processing.
> 
> I haven't looked at your code in detail, but it isn't clear from your
> description whether you can have multiple signalfds for different
> (distinct) sets of signals.  That would be useful as a first level of
> demultiplexing between multiple user-mode signal consumers.  If you
> allow that, you need to decide whether a read/poll on a signalfd blocks
> until the head queued signal is part of the signal set, or if any queued
> signal is in the set.

If you think a signal as a generic event source, than you see how more 
instances can attach to it and receive them.
You can have multiple signalfd with whatever sigmasks, even intersecting. 
You can pass the fd around, w/out the fear that a standard signal delivery 
would race with you on dequeue_signal (making you block after you got a 
POLLIN). You can have both standard and file based devliery, or you can 
not. Up to you. If you don't want to, you block them, otherwise you 
don't. It's flexible, and the code is like 20 lines more, and race-free.
Since the siginfo needed to be delivered too, at that point doing it over 
a read(2) would have messed up things [1], so I added a new syscall:

int signalfd_dequeue(int fd, siginfo_t *info, long timeo);

And the compat_ counter-part.



[1] I thought about having a compat-free siginfo to be pulled from 
    read(2), but that resulted to be messy, while we already have code to 
    ship siginfos to userspace.


- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 20:56             ` Davide Libenzi
@ 2007-03-07 21:26               ` Jeremy Fitzhardinge
  2007-03-07 21:35                 ` Davide Libenzi
  0 siblings, 1 reply; 20+ messages in thread
From: Jeremy Fitzhardinge @ 2007-03-07 21:26 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

Davide Libenzi wrote:
> If you think a signal as a generic event source, than you see how more 
> instances can attach to it and receive them.
> You can have multiple signalfd with whatever sigmasks, even intersecting. 
> You can pass the fd around, w/out the fear that a standard signal delivery 
> would race with you on dequeue_signal (making you block after you got a 
> POLLIN).
Well, I think using both this mechanism and normal signal delivery would
be pretty much insane.  But if you do, then the only sane way to use the
interface is if each signal is delivered once - and only once - by
whatever mechanism (otherwise how would you be able to distinguish
between the same signal duplicated vs two signals which look very
similar?).  If you're really worried about losing a signal between poll
and read you can always make the fd nonblocking.

>  You can have both standard and file based devliery, or you can 
> not. Up to you. If you don't want to, you block them, otherwise you 
> don't. It's flexible, and the code is like 20 lines more, and race-free.
>   

OK, but if you allow the kernel to duplicate along different delivery
paths, usermode pretty much has to go to the effort of making sure
there's only one delivery path in order to keep track of how many
signals really appeared.

The only way to make duplication sane is to guarantee that if a signal
path *can* deliver a signal, it *will* deliver the signal, so that
usermode has some chance of correlating queued signals along each path.

But if you do that, then you end up with bad results.  If you have a
signal blocked, then it means you're obligated to keep the signal queued
forever in case the signal gets unblocked, even if usermode already got
it via a signalfd and has no intention of ever unblocking the signal. 
Similarly with a signalfd that never gets read.  And if you fill the
pending signal queue, do you start dropping signals?

> Since the siginfo needed to be delivered too, at that point doing it over 
> a read(2) would have messed up things [1], so I added a new syscall:
>
> int signalfd_dequeue(int fd, siginfo_t *info, long timeo);
>
> And the compat_ counter-part.
>
>
>
> [1] I thought about having a compat-free siginfo to be pulled from 
>     read(2), but that resulted to be messy, while we already have code to 
>     ship siginfos to userspace.
>   

Hm.  Yeah, that's tough.  If you have a 32-bit and 64-bit process
reading from the same signalfd source, would you expect them to return
the same format or different?


    J

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 21:26               ` Jeremy Fitzhardinge
@ 2007-03-07 21:35                 ` Davide Libenzi
  2007-03-07 21:48                   ` Linus Torvalds
  2007-03-07 22:14                   ` Jeremy Fitzhardinge
  0 siblings, 2 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 21:35 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

On Wed, 7 Mar 2007, Jeremy Fitzhardinge wrote:

> Davide Libenzi wrote:
> > If you think a signal as a generic event source, than you see how more 
> > instances can attach to it and receive them.
> > You can have multiple signalfd with whatever sigmasks, even intersecting. 
> > You can pass the fd around, w/out the fear that a standard signal delivery 
> > would race with you on dequeue_signal (making you block after you got a 
> > POLLIN).
> Well, I think using both this mechanism and normal signal delivery would
> be pretty much insane.  But if you do, then the only sane way to use the
> interface is if each signal is delivered once - and only once - by
> whatever mechanism (otherwise how would you be able to distinguish
> between the same signal duplicated vs two signals which look very
> similar?).  If you're really worried about losing a signal between poll
> and read you can always make the fd nonblocking.

You have the *choice* to do that:

1) You want standard delivery only:

   - Just dont use signalfd

2) you want signalfd only:

   - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd

If you want both, you can have it. Race free.



> >  You can have both standard and file based devliery, or you can 
> > not. Up to you. If you don't want to, you block them, otherwise you 
> > don't. It's flexible, and the code is like 20 lines more, and race-free.
> >   
> 
> OK, but if you allow the kernel to duplicate along different delivery
> paths, usermode pretty much has to go to the effort of making sure
> there's only one delivery path in order to keep track of how many
> signals really appeared.
> 
> The only way to make duplication sane is to guarantee that if a signal
> path *can* deliver a signal, it *will* deliver the signal, so that
> usermode has some chance of correlating queued signals along each path.
> 
> But if you do that, then you end up with bad results.  If you have a
> signal blocked, then it means you're obligated to keep the signal queued
> forever in case the signal gets unblocked, even if usermode already got
> it via a signalfd and has no intention of ever unblocking the signal. 
> Similarly with a signalfd that never gets read.  And if you fill the
> pending signal queue, do you start dropping signals?

In the next patch set (v2), if a GFP_ATOMIC slab alloc fails in allocate a 
queue item, I incrememt a lost_sigs variable in the context, and poll(2) 
will return POLLERR.



> > Since the siginfo needed to be delivered too, at that point doing it over 
> > a read(2) would have messed up things [1], so I added a new syscall:
> >
> > int signalfd_dequeue(int fd, siginfo_t *info, long timeo);
> >
> > And the compat_ counter-part.
> >
> >
> >
> > [1] I thought about having a compat-free siginfo to be pulled from 
> >     read(2), but that resulted to be messy, while we already have code to 
> >     ship siginfos to userspace.
> >   
> 
> Hm.  Yeah, that's tough.  If you have a 32-bit and 64-bit process
> reading from the same signalfd source, would you expect them to return
> the same format or different?

The signalfd_dequeue() has a compat layer that uses "struct compat_siginfo".



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 21:35                 ` Davide Libenzi
@ 2007-03-07 21:48                   ` Linus Torvalds
  2007-03-07 22:01                     ` Ulrich Drepper
  2007-03-07 22:01                     ` Davide Libenzi
  2007-03-07 22:14                   ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 20+ messages in thread
From: Linus Torvalds @ 2007-03-07 21:48 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Jeremy Fitzhardinge, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton



On Wed, 7 Mar 2007, Davide Libenzi wrote:
> 
> You have the *choice* to do that:
> 
> 1) You want standard delivery only:
> 
>    - Just dont use signalfd
> 
> 2) you want signalfd only:
> 
>    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
> 
> If you want both, you can have it. Race free.

.. but maybe with more code and lots of confusion. I'm still unclear on 
any upsides here. 

Choice is good, but only if it's *useful* choice.

		Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 21:48                   ` Linus Torvalds
@ 2007-03-07 22:01                     ` Ulrich Drepper
  2007-03-07 22:10                       ` Davide Libenzi
  2007-03-07 22:01                     ` Davide Libenzi
  1 sibling, 1 reply; 20+ messages in thread
From: Ulrich Drepper @ 2007-03-07 22:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Davide Libenzi, Jeremy Fitzhardinge, Stephen Rothwell,
	Linux Kernel Mailing List, Andrew Morton

On 3/7/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
> > 1) You want standard delivery only:
> >
> >    - Just dont use signalfd
> >
> > 2) you want signalfd only:
> >
> >    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
> >
> > If you want both, you can have it. Race free.
>
> .. but maybe with more code and lots of confusion. I'm still unclear on
> any upsides here.
>
> Choice is good, but only if it's *useful* choice.

Not only that.  If you don't force that the signal is blocked when
using signalfd() you are bound to run into problems.  For the same
reason is it required to have signals blocked when you use sigwait()
etc.  Don't try to innovate here, I guarantee you it's going to break
something somewhere.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 21:48                   ` Linus Torvalds
  2007-03-07 22:01                     ` Ulrich Drepper
@ 2007-03-07 22:01                     ` Davide Libenzi
  1 sibling, 0 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 22:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Jeremy Fitzhardinge, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

On Wed, 7 Mar 2007, Linus Torvalds wrote:

> 
> 
> On Wed, 7 Mar 2007, Davide Libenzi wrote:
> > 
> > You have the *choice* to do that:
> > 
> > 1) You want standard delivery only:
> > 
> >    - Just dont use signalfd
> > 
> > 2) you want signalfd only:
> > 
> >    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
> > 
> > If you want both, you can have it. Race free.
> 
> .. but maybe with more code and lots of confusion. I'm still unclear on 
> any upsides here. 

Ok, given that you have to place the notifiers (you had a direct call to 
wake_up, same thing - but was not handling the "the sighand has been 
detched" case), your queue_signal() call translates to:

	sq = list_entry(ctx->squeue.next, struct signalfd_sq, lnk);
	list_del(&sq->lnk);

That's pretty much it. There is more code WRT your patch, but there are 
more things handled too (reuse of the fd, notify detachment, send siginfo).
And those has to be handled in any case.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 22:01                     ` Ulrich Drepper
@ 2007-03-07 22:10                       ` Davide Libenzi
  2007-03-16  5:16                         ` Ulrich Drepper
  0 siblings, 1 reply; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 22:10 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Linus Torvalds, Jeremy Fitzhardinge, Stephen Rothwell,
	Linux Kernel Mailing List, Andrew Morton

On Wed, 7 Mar 2007, Ulrich Drepper wrote:

> On 3/7/07, Linus Torvalds <torvalds@linux-foundation.org> wrote:
> > > 1) You want standard delivery only:
> > >
> > >    - Just dont use signalfd
> > >
> > > 2) you want signalfd only:
> > >
> > >    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
> > >
> > > If you want both, you can have it. Race free.
> > 
> > .. but maybe with more code and lots of confusion. I'm still unclear on
> > any upsides here.
> > 
> > Choice is good, but only if it's *useful* choice.
> 
> Not only that.  If you don't force that the signal is blocked when
> using signalfd() you are bound to run into problems.  For the same
> reason is it required to have signals blocked when you use sigwait()
> etc.  Don't try to innovate here, I guarantee you it's going to break
> something somewhere.

Let's do this. How about you throw this way one of the case that would 
possibly break, and I test it?
Queues are separeted, so if you have a signal hitting you while in 
signalfd_dequeue, either the signal is not for your set (and you'd get an 
EINTR ATM), or it is for your set too, and signalfd_dequeue will return 
with it.
But again, I'd love to have some insides about possible breakages so that 
I can make test cases for them.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 21:35                 ` Davide Libenzi
  2007-03-07 21:48                   ` Linus Torvalds
@ 2007-03-07 22:14                   ` Jeremy Fitzhardinge
  2007-03-07 22:21                     ` Davide Libenzi
  1 sibling, 1 reply; 20+ messages in thread
From: Jeremy Fitzhardinge @ 2007-03-07 22:14 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

Davide Libenzi wrote:
> You have the *choice* to do that:
>
> 1) You want standard delivery only:
>
>    - Just dont use signalfd
>
> 2) you want signalfd only:
>
>    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
>
> If you want both, you can have it. Race free.
>   

It's only usefully race-free if you are guarantee that each signal gets
delivered once, by one path or the other.  Otherwise you get a
non-deterministic number of each signal actually delivered - what
earthly use is that to an application?

    J

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 22:14                   ` Jeremy Fitzhardinge
@ 2007-03-07 22:21                     ` Davide Libenzi
  0 siblings, 0 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-07 22:21 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Linus Torvalds, Stephen Rothwell, Linux Kernel Mailing List,
	Andrew Morton

On Wed, 7 Mar 2007, Jeremy Fitzhardinge wrote:

> Davide Libenzi wrote:
> > You have the *choice* to do that:
> >
> > 1) You want standard delivery only:
> >
> >    - Just dont use signalfd
> >
> > 2) you want signalfd only:
> >
> >    - Do a sigprocmask(SIG_BLOCK) of the same mask you pass to signalfd
> >
> > If you want both, you can have it. Race free.
> >   
> 
> It's only usefully race-free if you are guarantee that each signal gets
> delivered once, by one path or the other.  Otherwise you get a
> non-deterministic number of each signal actually delivered - what
> earthly use is that to an application?

As I mentioned above, you do have the option to do that. With basically 
the same kernel-side code. I mean, I could add a flag to signalfd and do 
the blocking inside there, but that's just pushing stuff into the call.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-07 22:10                       ` Davide Libenzi
@ 2007-03-16  5:16                         ` Ulrich Drepper
  2007-03-16  6:31                           ` Davide Libenzi
  0 siblings, 1 reply; 20+ messages in thread
From: Ulrich Drepper @ 2007-03-16  5:16 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Linus Torvalds, Jeremy Fitzhardinge, Stephen Rothwell,
	Linux Kernel Mailing List, Andrew Morton

On 3/7/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> Let's do this. How about you throw this way one of the case that would
> possibly break, and I test it?

Since you make such claims I assume your signalfd() implementation
considers a signal delivered once it is reported to an epoll() caller.
 Right?

This is not what you really want, at least not in all cases.  A signal
might be something you want to react on right away.  Unless
pthread_kill() is used it is delivered to the _process_ and not a
specific thread.  But this means if epoll() reports two events to one
thread calling epoll() (one of them being a signal) and this thread is
then stuck processing the other request, the signal is not handled
even though there might be a second or third thread available to
receive the signal.  Those threads have the same right to receive the
signal and the current implementation always looks for the
best/fastest way to deliver the signal.

This means to me that reporting the signal in epoll() does _not_ mark
the signal as handled.  Somehow (probably using the signalfd()
descriptor) the thread must explicitly request the signal to be
delivered.  But if you do this the epoll() handling is fantastically
racy if the signal is not blocked.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/4] signalfd v1 - signalfd core ...
  2007-03-16  5:16                         ` Ulrich Drepper
@ 2007-03-16  6:31                           ` Davide Libenzi
  0 siblings, 0 replies; 20+ messages in thread
From: Davide Libenzi @ 2007-03-16  6:31 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Linus Torvalds, Jeremy Fitzhardinge, Stephen Rothwell,
	Linux Kernel Mailing List, Andrew Morton

On Thu, 15 Mar 2007, Ulrich Drepper wrote:

> On 3/7/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > Let's do this. How about you throw this way one of the case that would
> > possibly break, and I test it?
> 
> Since you make such claims I assume your signalfd() implementation
> considers a signal delivered once it is reported to an epoll() caller.
> Right?

The wakeup phase in epoll does not mean the signal is delivered. That 
happens when the caller does a read(2) on the signalfd. The read(2) on the 
signalfd ends up in calling dequeue_signal(), the same function use by the 
kernel to spill out signals to deliver (they peek from the same queue).



> This is not what you really want, at least not in all cases.  A signal
> might be something you want to react on right away.  Unless
> pthread_kill() is used it is delivered to the _process_ and not a
> specific thread.  But this means if epoll() reports two events to one
> thread calling epoll() (one of them being a signal) and this thread is
> then stuck processing the other request, the signal is not handled
> even though there might be a second or third thread available to
> receive the signal.  Those threads have the same right to receive the
> signal and the current implementation always looks for the
> best/fastest way to deliver the signal.

The behaviour depends on the sigmask you pass to signalfd(). You can 
select signals that you want to handle in a standard way, and the ones 
that you want to handle with signalfd.
Typically programs using signalfd() do not want the asyncronous behaviour 
of signals at all (with all the limits you have in the handler), and the 
event dispatch loop never blocks by definition (otherwise you have more 
serious problems than a signal not delivered). They are also very likely 
to be single threaded.



> This means to me that reporting the signal in epoll() does _not_ mark
> the signal as handled.  Somehow (probably using the signalfd()
> descriptor) the thread must explicitly request the signal to be
> delivered.  But if you do this the epoll() handling is fantastically
> racy if the signal is not blocked.

As I said, when a signal hits send_signal (or the queued versions), a 
wakeup is done on the wait queue head poll (or select/epoll) is sleeping 
on. This ends up delivring a POLLIN, but the signal is not fetched (by the 
mean of dequeue_signal) until a read(2) on the signalfd is done.
Since both standard delivery and signalfd's read(2) fish from the same 
queue, you have to block the signals that you want to have the guarantee 
to be able to fetch with a read(2) (signalfd supports O_NONBLOCK also).
If you do not block the signal, you get a wakeup, but you may not find a 
signal to dequeue at the next read(2), because a standard delivery might 
have stole the signal by preceeding you in dequeue_signal.



- Davide



^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2007-03-16  6:35 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-07  1:36 [patch 1/4] signalfd v1 - signalfd core Davide Libenzi
2007-03-07  1:43 ` Davide Libenzi
2007-03-07  4:55 ` Stephen Rothwell
2007-03-07  7:11   ` Davide Libenzi
2007-03-07 12:38     ` Stephen Rothwell
2007-03-07 16:57       ` Linus Torvalds
2007-03-07 17:50         ` Davide Libenzi
2007-03-07 20:30           ` Jeremy Fitzhardinge
2007-03-07 20:56             ` Davide Libenzi
2007-03-07 21:26               ` Jeremy Fitzhardinge
2007-03-07 21:35                 ` Davide Libenzi
2007-03-07 21:48                   ` Linus Torvalds
2007-03-07 22:01                     ` Ulrich Drepper
2007-03-07 22:10                       ` Davide Libenzi
2007-03-16  5:16                         ` Ulrich Drepper
2007-03-16  6:31                           ` Davide Libenzi
2007-03-07 22:01                     ` Davide Libenzi
2007-03-07 22:14                   ` Jeremy Fitzhardinge
2007-03-07 22:21                     ` Davide Libenzi
2007-03-07 17:42       ` Davide Libenzi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.