qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH][RFC] Use signalfd() to work around signal/select race
@ 2008-08-26 20:44 Anthony Liguori
  2008-08-27  9:51 ` [Qemu-devel] " Ian Jackson
  0 siblings, 1 reply; 4+ messages in thread
From: Anthony Liguori @ 2008-08-26 20:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Anthony Liguori, Ian Jackson, Gerd Hoffman

Here's my patch to work around the signal/select race.  I think it cleans up
the code quite nicely.  While signalfd() is Linux-bias, I think it's a nice
interface and the emulation isn't too bad.

I've only lightly tested.  I'm looking to see what people's reaction would be
to this.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

diff --git a/Makefile b/Makefile
index 9c219c3..e676900 100644
--- a/Makefile
+++ b/Makefile
@@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS)
 ifdef CONFIG_WIN32
 QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o
 else
-QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o
+QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o compatfd.o
 endif
 
 ######################################################################
@@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $<
 
 qemu-nbd$(EXESUF):  qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \
-		    osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS)
+		    osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS)
 	$(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
 
 # dyngen host tool
diff --git a/Makefile.target b/Makefile.target
index 2464484..27a9eb6 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
 ifdef CONFIG_WIN32
 OBJS+=block-raw-win32.o
 else
-OBJS+=block-raw-posix.o
+OBJS+=block-raw-posix.o compatfd.o
 endif
 
 LIBS+=-lz
diff --git a/block-raw-posix.c b/block-raw-posix.c
index b1fc921..d5a4514 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -25,8 +25,10 @@
 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
 #include "qemu-timer.h"
 #include "exec-all.h"
+#include "qemu-char.h"
 #endif
 #include "block_int.h"
+#include "compatfd.h"
 #include <assert.h>
 #ifdef CONFIG_AIO
 #include <aio.h>
@@ -437,52 +439,12 @@ typedef struct RawAIOCB {
     int ret;
 } RawAIOCB;
 
+static int aio_sig_fd = -1;
 static int aio_sig_num = SIGUSR2;
 static RawAIOCB *first_aio; /* AIO issued */
 static int aio_initialized = 0;
 
-static void aio_signal_handler(int signum)
-{
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
-    CPUState *env = cpu_single_env;
-    if (env) {
-        /* stop the currently executing cpu because a timer occured */
-        cpu_interrupt(env, CPU_INTERRUPT_EXIT);
-#ifdef USE_KQEMU
-        if (env->kqemu_enabled) {
-            kqemu_cpu_interrupt(env);
-        }
-#endif
-    }
-#endif
-}
-
-void qemu_aio_init(void)
-{
-    struct sigaction act;
-
-    aio_initialized = 1;
-
-    sigfillset(&act.sa_mask);
-    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-    act.sa_handler = aio_signal_handler;
-    sigaction(aio_sig_num, &act, NULL);
-
-#if defined(__GLIBC__) && defined(__linux__)
-    {
-        /* XXX: aio thread exit seems to hang on RedHat 9 and this init
-           seems to fix the problem. */
-        struct aioinit ai;
-        memset(&ai, 0, sizeof(ai));
-        ai.aio_threads = 1;
-        ai.aio_num = 1;
-        ai.aio_idle_time = 365 * 100000;
-        aio_init(&ai);
-    }
-#endif
-}
-
-void qemu_aio_poll(void)
+static void qemu_aio_poll(void *opaque)
 {
     RawAIOCB *acb, **pacb;
     int ret;
@@ -523,49 +485,66 @@ void qemu_aio_poll(void)
  the_end: ;
 }
 
+void qemu_aio_init(void)
+{
+    sigset_t mask;
+
+    aio_initialized = 1;
+
+    /* Make sure to block AIO signal */
+    sigemptyset(&mask);
+    sigaddset(&mask, aio_sig_num);
+    sigprocmask(SIG_BLOCK, &mask, NULL);
+    
+    aio_sig_fd = qemu_signalfd(&mask);
+#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
+    qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
+#endif
+
+#if defined(__GLIBC__) && defined(__linux__)
+    {
+        /* XXX: aio thread exit seems to hang on RedHat 9 and this init
+           seems to fix the problem. */
+        struct aioinit ai;
+        memset(&ai, 0, sizeof(ai));
+        ai.aio_threads = 1;
+        ai.aio_num = 1;
+        ai.aio_idle_time = 365 * 100000;
+        aio_init(&ai);
+    }
+#endif
+}
+
 /* Wait for all IO requests to complete.  */
 void qemu_aio_flush(void)
 {
-    qemu_aio_wait_start();
-    qemu_aio_poll();
+    qemu_aio_poll(NULL);
     while (first_aio) {
         qemu_aio_wait();
     }
-    qemu_aio_wait_end();
-}
-
-/* wait until at least one AIO was handled */
-static sigset_t wait_oset;
-
-void qemu_aio_wait_start(void)
-{
-    sigset_t set;
-
-    if (!aio_initialized)
-        qemu_aio_init();
-    sigemptyset(&set);
-    sigaddset(&set, aio_sig_num);
-    sigprocmask(SIG_BLOCK, &set, &wait_oset);
 }
 
 void qemu_aio_wait(void)
 {
-    sigset_t set;
-    int nb_sigs;
+    int ret;
 
 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
     if (qemu_bh_poll())
         return;
 #endif
-    sigemptyset(&set);
-    sigaddset(&set, aio_sig_num);
-    sigwait(&set, &nb_sigs);
-    qemu_aio_poll();
-}
 
-void qemu_aio_wait_end(void)
-{
-    sigprocmask(SIG_SETMASK, &wait_oset, NULL);
+    do {
+        fd_set rdfds;
+
+        FD_ZERO(&rdfds);
+        FD_SET(aio_sig_fd, &rdfds);
+
+        ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
+        if (ret == -1 && errno == EINTR)
+            continue;
+    } while (ret == 0);
+
+    qemu_aio_poll(NULL);
 }
 
 static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
@@ -703,18 +682,10 @@ void qemu_aio_init(void)
 {
 }
 
-void qemu_aio_poll(void)
-{
-}
-
 void qemu_aio_flush(void)
 {
 }
 
-void qemu_aio_wait_start(void)
-{
-}
-
 void qemu_aio_wait(void)
 {
 #if !defined(QEMU_IMG) && !defined(QEMU_NBD)
@@ -722,10 +693,6 @@ void qemu_aio_wait(void)
 #endif
 }
 
-void qemu_aio_wait_end(void)
-{
-}
-
 #endif /* CONFIG_AIO */
 
 static void raw_close(BlockDriverState *bs)
diff --git a/block-raw-win32.c b/block-raw-win32.c
index 43d3f6c..7b88dcf 100644
--- a/block-raw-win32.c
+++ b/block-raw-win32.c
@@ -350,18 +350,10 @@ void qemu_aio_init(void)
 {
 }
 
-void qemu_aio_poll(void)
-{
-}
-
 void qemu_aio_flush(void)
 {
 }
 
-void qemu_aio_wait_start(void)
-{
-}
-
 void qemu_aio_wait(void)
 {
 #ifndef QEMU_IMG
@@ -369,10 +361,6 @@ void qemu_aio_wait(void)
 #endif
 }
 
-void qemu_aio_wait_end(void)
-{
-}
-
 BlockDriver bdrv_raw = {
     "raw",
     sizeof(BDRVRawState),
diff --git a/block.c b/block.c
index d02b705..844a785 100644
--- a/block.c
+++ b/block.c
@@ -1281,17 +1281,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
     BlockDriverAIOCB *acb;
 
     async_ret = NOT_DONE;
-    qemu_aio_wait_start();
     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
                         bdrv_rw_em_cb, &async_ret);
-    if (acb == NULL) {
-        qemu_aio_wait_end();
+    if (acb == NULL)
         return -1;
-    }
+
     while (async_ret == NOT_DONE) {
         qemu_aio_wait();
     }
-    qemu_aio_wait_end();
+
     return async_ret;
 }
 
@@ -1302,17 +1300,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
     BlockDriverAIOCB *acb;
 
     async_ret = NOT_DONE;
-    qemu_aio_wait_start();
     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
                          bdrv_rw_em_cb, &async_ret);
-    if (acb == NULL) {
-        qemu_aio_wait_end();
+    if (acb == NULL)
         return -1;
-    }
     while (async_ret == NOT_DONE) {
         qemu_aio_wait();
     }
-    qemu_aio_wait_end();
     return async_ret;
 }
 
diff --git a/block.h b/block.h
index fa741b5..0443585 100644
--- a/block.h
+++ b/block.h
@@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 void qemu_aio_init(void);
-void qemu_aio_poll(void);
 void qemu_aio_flush(void);
-void qemu_aio_wait_start(void);
 void qemu_aio_wait(void);
-void qemu_aio_wait_end(void);
 
 int qemu_key_check(BlockDriverState *bs, const char *name);
 
diff --git a/compatfd.c b/compatfd.c
new file mode 100644
index 0000000..46b0ae7
--- /dev/null
+++ b/compatfd.c
@@ -0,0 +1,128 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "compatfd.h"
+
+#include <sys/syscall.h>
+#include <pthread.h>
+
+struct sigfd_compat_info
+{
+    sigset_t mask;
+    int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+    struct sigfd_compat_info *info = opaque;
+    int err;
+    sigset_t all;
+
+    sigfillset(&all);
+    sigprocmask(SIG_BLOCK, &all, NULL);
+
+    do {
+	siginfo_t siginfo;
+
+	err = sigwaitinfo(&info->mask, &siginfo);
+	if (err == -1 && errno == EINTR) {
+            err = 0;
+            continue;
+        }
+
+	if (err > 0) {
+	    char buffer[128];
+	    size_t offset = 0;
+
+	    memcpy(buffer, &err, sizeof(err));
+	    while (offset < sizeof(buffer)) {
+		ssize_t len;
+
+		len = write(info->fd, buffer + offset,
+			    sizeof(buffer) - offset);
+		if (len == -1 && errno == EINTR)
+		    continue;
+
+		if (len <= 0) {
+		    err = -1;
+		    break;
+		}
+
+		offset += len;
+	    }
+	}
+    } while (err >= 0);
+
+    return NULL;
+}
+
+static int qemu_signalfd_compat(const sigset_t *mask)
+{
+    pthread_attr_t attr;
+    pthread_t tid;
+    struct sigfd_compat_info *info;
+    int fds[2];
+
+    info = malloc(sizeof(*info));
+    if (info == NULL) {
+	errno = ENOMEM;
+	return -1;
+    }
+
+    if (pipe(fds) == -1) {
+	free(info);
+	return -1;
+    }
+
+    memcpy(&info->mask, mask, sizeof(*mask));
+    info->fd = fds[1];
+
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+    pthread_create(&tid, &attr, sigwait_compat, info);
+
+    pthread_attr_destroy(&attr);
+
+    return fds[0];
+}
+
+int qemu_signalfd(const sigset_t *mask)
+{
+#if defined(SYS_signalfd)
+    int ret;
+
+    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+    if (!(ret == -1 && errno == ENOSYS))
+	return ret;
+#endif
+
+    return qemu_signalfd_compat(mask);
+}
+
+int qemu_eventfd(int *fds)
+{
+#if defined(SYS_eventfd)
+    int ret;
+
+    ret = syscall(SYS_eventfd, 0);
+    if (ret >= 0) {
+	fds[0] = fds[1] = ret;
+	return 0;
+    } else if (!(ret == -1 && errno == ENOSYS))
+	return ret;
+#endif
+
+    return pipe(fds);
+}
diff --git a/vl.c b/vl.c
index 7ca8420..6f51d53 100644
--- a/vl.c
+++ b/vl.c
@@ -7470,7 +7470,6 @@ void main_loop_wait(int timeout)
         slirp_select_poll(&rfds, &wfds, &xfds);
     }
 #endif
-    qemu_aio_poll();
 
     if (vm_running) {
         if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER)))

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH][RFC] Use signalfd() to work around signal/select race
  2008-08-26 20:44 [Qemu-devel] [PATCH][RFC] Use signalfd() to work around signal/select race Anthony Liguori
@ 2008-08-27  9:51 ` Ian Jackson
  2008-08-27 14:06   ` Anthony Liguori
  0 siblings, 1 reply; 4+ messages in thread
From: Ian Jackson @ 2008-08-27  9:51 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gerd Hoffman

Anthony Liguori writes ("[PATCH][RFC] Use signalfd() to work around signal/select race"):
> Here's my patch to work around the signal/select race.  I think it cleans up
> the code quite nicely.  While signalfd() is Linux-bias, I think it's a nice
> interface and the emulation isn't too bad.
> 
> I've only lightly tested.  I'm looking to see what people's reaction would be
> to this.

For the avoidance of any doubt, following my criticisms of Anthony's
approach: I think it's essential that we have at least one of
Anthony's or my fixes to this problem.

Obviously I prefer my patch because I wrote it, but other people might
prefer it because it's textually much smaller, closer to the existing
code, and doesn't use threads.

Also, my patch has been pretty thoroughly tested, including some
performance stress testing, as part of the Xen 3.3.0 release.

Ian.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH][RFC] Use signalfd() to work around signal/select race
  2008-08-27  9:51 ` [Qemu-devel] " Ian Jackson
@ 2008-08-27 14:06   ` Anthony Liguori
  2008-08-27 14:44     ` Ian Jackson
  0 siblings, 1 reply; 4+ messages in thread
From: Anthony Liguori @ 2008-08-27 14:06 UTC (permalink / raw)
  To: Ian Jackson; +Cc: qemu-devel, Gerd Hoffman

Ian Jackson wrote:
> Anthony Liguori writes ("[PATCH][RFC] Use signalfd() to work around signal/select race"):
>   
>> Here's my patch to work around the signal/select race.  I think it cleans up
>> the code quite nicely.  While signalfd() is Linux-bias, I think it's a nice
>> interface and the emulation isn't too bad.
>>
>> I've only lightly tested.  I'm looking to see what people's reaction would be
>> to this.
>>     
>
> For the avoidance of any doubt, following my criticisms of Anthony's
> approach: I think it's essential that we have at least one of
> Anthony's or my fixes to this problem.
>
> Obviously I prefer my patch because I wrote it, but other people might
> prefer it because it's textually much smaller, closer to the existing
> code, and doesn't use threads.
>
> Also, my patch has been pretty thoroughly tested, including some
> performance stress testing, as part of the Xen 3.3.0 release.
>   

FWIW, I still don't see your patch.  It's not on gmane so I don't think 
it's a problem on my side.

Regards,

Anthony Liguori

> Ian.
>   

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH][RFC] Use signalfd() to work around signal/select race
  2008-08-27 14:06   ` Anthony Liguori
@ 2008-08-27 14:44     ` Ian Jackson
  0 siblings, 0 replies; 4+ messages in thread
From: Ian Jackson @ 2008-08-27 14:44 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Gerd Hoffman

Anthony Liguori writes ("Re: [PATCH][RFC] Use signalfd() to work around signal/select race"):
> FWIW, I still don't see your patch.  It's not on gmane so I don't think 
> it's a problem on my side.

Thanks for pointing that out.  git-format-patch had written a random
hex number in the From_ line.  formail didn't strip it out as I was
hoping.  exim took it as the envelope sender.  Presumably Citrix's M$
mail system then offered it to lists.nongnu.org which rejected it, and
probably Citrix's system then discarded it.  Adding -f to my sendmail
invocation seems to have done the trick.

Ian.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2008-08-27 14:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-26 20:44 [Qemu-devel] [PATCH][RFC] Use signalfd() to work around signal/select race Anthony Liguori
2008-08-27  9:51 ` [Qemu-devel] " Ian Jackson
2008-08-27 14:06   ` Anthony Liguori
2008-08-27 14:44     ` Ian Jackson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).