From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KY5of-0000hp-NW for qemu-devel@nongnu.org; Tue, 26 Aug 2008 17:11:05 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KY5oe-0000hJ-Lx for qemu-devel@nongnu.org; Tue, 26 Aug 2008 17:11:05 -0400 Received: from [199.232.76.173] (port=44644 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KY5TJ-0003z1-83 for qemu-devel@nongnu.org; Tue, 26 Aug 2008 16:49:01 -0400 Received: from e5.ny.us.ibm.com ([32.97.182.145]:52130) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1KY5TH-00064o-Qf for qemu-devel@nongnu.org; Tue, 26 Aug 2008 16:49:00 -0400 Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by e5.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m7QKjU0O022640 for ; Tue, 26 Aug 2008 16:45:30 -0400 Received: from d01av03.pok.ibm.com (d01av03.pok.ibm.com [9.56.224.217]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v9.0) with ESMTP id m7QKjUqr236480 for ; Tue, 26 Aug 2008 16:45:30 -0400 Received: from d01av03.pok.ibm.com (loopback [127.0.0.1]) by d01av03.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m7QKjUVT010005 for ; Tue, 26 Aug 2008 16:45:30 -0400 From: Anthony Liguori Date: Tue, 26 Aug 2008 15:44:46 -0500 Message-Id: <1219783486-20209-1-git-send-email-aliguori@us.ibm.com> Subject: [Qemu-devel] [PATCH][RFC] Use signalfd() to work around signal/select race Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Anthony Liguori , Ian Jackson , Gerd Hoffman Here's my patch to work around the signal/select race. I think it cleans up the code quite nicely. While signalfd() is Linux-bias, I think it's a nice interface and the emulation isn't too bad. I've only lightly tested. I'm looking to see what people's reaction would be to this. Signed-off-by: Anthony Liguori diff --git a/Makefile b/Makefile index 9c219c3..e676900 100644 --- a/Makefile +++ b/Makefile @@ -177,7 +177,7 @@ QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) ifdef CONFIG_WIN32 QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o else -QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o +QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o compatfd.o endif ###################################################################### @@ -195,7 +195,7 @@ qemu-nbd-%.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $< qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \ - osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS) + osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS) $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) # dyngen host tool diff --git a/Makefile.target b/Makefile.target index 2464484..27a9eb6 100644 --- a/Makefile.target +++ b/Makefile.target @@ -476,7 +476,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o ifdef CONFIG_WIN32 OBJS+=block-raw-win32.o else -OBJS+=block-raw-posix.o +OBJS+=block-raw-posix.o compatfd.o endif LIBS+=-lz diff --git a/block-raw-posix.c b/block-raw-posix.c index b1fc921..d5a4514 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -25,8 +25,10 @@ #if !defined(QEMU_IMG) && !defined(QEMU_NBD) #include "qemu-timer.h" #include "exec-all.h" +#include "qemu-char.h" #endif #include "block_int.h" +#include "compatfd.h" #include #ifdef CONFIG_AIO #include @@ -437,52 +439,12 @@ typedef struct RawAIOCB { int ret; } RawAIOCB; +static int aio_sig_fd = -1; static int aio_sig_num = SIGUSR2; static RawAIOCB *first_aio; /* AIO issued */ static int aio_initialized = 0; -static void aio_signal_handler(int signum) -{ -#if !defined(QEMU_IMG) && !defined(QEMU_NBD) - CPUState *env = cpu_single_env; - if (env) { - /* stop the currently executing cpu because a timer occured */ - cpu_interrupt(env, CPU_INTERRUPT_EXIT); -#ifdef USE_KQEMU - if (env->kqemu_enabled) { - kqemu_cpu_interrupt(env); - } -#endif - } -#endif -} - -void qemu_aio_init(void) -{ - struct sigaction act; - - aio_initialized = 1; - - sigfillset(&act.sa_mask); - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ - act.sa_handler = aio_signal_handler; - sigaction(aio_sig_num, &act, NULL); - -#if defined(__GLIBC__) && defined(__linux__) - { - /* XXX: aio thread exit seems to hang on RedHat 9 and this init - seems to fix the problem. */ - struct aioinit ai; - memset(&ai, 0, sizeof(ai)); - ai.aio_threads = 1; - ai.aio_num = 1; - ai.aio_idle_time = 365 * 100000; - aio_init(&ai); - } -#endif -} - -void qemu_aio_poll(void) +static void qemu_aio_poll(void *opaque) { RawAIOCB *acb, **pacb; int ret; @@ -523,49 +485,66 @@ void qemu_aio_poll(void) the_end: ; } +void qemu_aio_init(void) +{ + sigset_t mask; + + aio_initialized = 1; + + /* Make sure to block AIO signal */ + sigemptyset(&mask); + sigaddset(&mask, aio_sig_num); + sigprocmask(SIG_BLOCK, &mask, NULL); + + aio_sig_fd = qemu_signalfd(&mask); +#if !defined(QEMU_IMG) && !defined(QEMU_NBD) + qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL); +#endif + +#if defined(__GLIBC__) && defined(__linux__) + { + /* XXX: aio thread exit seems to hang on RedHat 9 and this init + seems to fix the problem. */ + struct aioinit ai; + memset(&ai, 0, sizeof(ai)); + ai.aio_threads = 1; + ai.aio_num = 1; + ai.aio_idle_time = 365 * 100000; + aio_init(&ai); + } +#endif +} + /* Wait for all IO requests to complete. */ void qemu_aio_flush(void) { - qemu_aio_wait_start(); - qemu_aio_poll(); + qemu_aio_poll(NULL); while (first_aio) { qemu_aio_wait(); } - qemu_aio_wait_end(); -} - -/* wait until at least one AIO was handled */ -static sigset_t wait_oset; - -void qemu_aio_wait_start(void) -{ - sigset_t set; - - if (!aio_initialized) - qemu_aio_init(); - sigemptyset(&set); - sigaddset(&set, aio_sig_num); - sigprocmask(SIG_BLOCK, &set, &wait_oset); } void qemu_aio_wait(void) { - sigset_t set; - int nb_sigs; + int ret; #if !defined(QEMU_IMG) && !defined(QEMU_NBD) if (qemu_bh_poll()) return; #endif - sigemptyset(&set); - sigaddset(&set, aio_sig_num); - sigwait(&set, &nb_sigs); - qemu_aio_poll(); -} -void qemu_aio_wait_end(void) -{ - sigprocmask(SIG_SETMASK, &wait_oset, NULL); + do { + fd_set rdfds; + + FD_ZERO(&rdfds); + FD_SET(aio_sig_fd, &rdfds); + + ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL); + if (ret == -1 && errno == EINTR) + continue; + } while (ret == 0); + + qemu_aio_poll(NULL); } static RawAIOCB *raw_aio_setup(BlockDriverState *bs, @@ -703,18 +682,10 @@ void qemu_aio_init(void) { } -void qemu_aio_poll(void) -{ -} - void qemu_aio_flush(void) { } -void qemu_aio_wait_start(void) -{ -} - void qemu_aio_wait(void) { #if !defined(QEMU_IMG) && !defined(QEMU_NBD) @@ -722,10 +693,6 @@ void qemu_aio_wait(void) #endif } -void qemu_aio_wait_end(void) -{ -} - #endif /* CONFIG_AIO */ static void raw_close(BlockDriverState *bs) diff --git a/block-raw-win32.c b/block-raw-win32.c index 43d3f6c..7b88dcf 100644 --- a/block-raw-win32.c +++ b/block-raw-win32.c @@ -350,18 +350,10 @@ void qemu_aio_init(void) { } -void qemu_aio_poll(void) -{ -} - void qemu_aio_flush(void) { } -void qemu_aio_wait_start(void) -{ -} - void qemu_aio_wait(void) { #ifndef QEMU_IMG @@ -369,10 +361,6 @@ void qemu_aio_wait(void) #endif } -void qemu_aio_wait_end(void) -{ -} - BlockDriver bdrv_raw = { "raw", sizeof(BDRVRawState), diff --git a/block.c b/block.c index d02b705..844a785 100644 --- a/block.c +++ b/block.c @@ -1281,17 +1281,15 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, BlockDriverAIOCB *acb; async_ret = NOT_DONE; - qemu_aio_wait_start(); acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, bdrv_rw_em_cb, &async_ret); - if (acb == NULL) { - qemu_aio_wait_end(); + if (acb == NULL) return -1; - } + while (async_ret == NOT_DONE) { qemu_aio_wait(); } - qemu_aio_wait_end(); + return async_ret; } @@ -1302,17 +1300,13 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, BlockDriverAIOCB *acb; async_ret = NOT_DONE; - qemu_aio_wait_start(); acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, bdrv_rw_em_cb, &async_ret); - if (acb == NULL) { - qemu_aio_wait_end(); + if (acb == NULL) return -1; - } while (async_ret == NOT_DONE) { qemu_aio_wait(); } - qemu_aio_wait_end(); return async_ret; } diff --git a/block.h b/block.h index fa741b5..0443585 100644 --- a/block.h +++ b/block.h @@ -90,11 +90,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, void bdrv_aio_cancel(BlockDriverAIOCB *acb); void qemu_aio_init(void); -void qemu_aio_poll(void); void qemu_aio_flush(void); -void qemu_aio_wait_start(void); void qemu_aio_wait(void); -void qemu_aio_wait_end(void); int qemu_key_check(BlockDriverState *bs, const char *name); diff --git a/compatfd.c b/compatfd.c new file mode 100644 index 0000000..46b0ae7 --- /dev/null +++ b/compatfd.c @@ -0,0 +1,128 @@ +/* + * signalfd/eventfd compatibility + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "compatfd.h" + +#include +#include + +struct sigfd_compat_info +{ + sigset_t mask; + int fd; +}; + +static void *sigwait_compat(void *opaque) +{ + struct sigfd_compat_info *info = opaque; + int err; + sigset_t all; + + sigfillset(&all); + sigprocmask(SIG_BLOCK, &all, NULL); + + do { + siginfo_t siginfo; + + err = sigwaitinfo(&info->mask, &siginfo); + if (err == -1 && errno == EINTR) { + err = 0; + continue; + } + + if (err > 0) { + char buffer[128]; + size_t offset = 0; + + memcpy(buffer, &err, sizeof(err)); + while (offset < sizeof(buffer)) { + ssize_t len; + + len = write(info->fd, buffer + offset, + sizeof(buffer) - offset); + if (len == -1 && errno == EINTR) + continue; + + if (len <= 0) { + err = -1; + break; + } + + offset += len; + } + } + } while (err >= 0); + + return NULL; +} + +static int qemu_signalfd_compat(const sigset_t *mask) +{ + pthread_attr_t attr; + pthread_t tid; + struct sigfd_compat_info *info; + int fds[2]; + + info = malloc(sizeof(*info)); + if (info == NULL) { + errno = ENOMEM; + return -1; + } + + if (pipe(fds) == -1) { + free(info); + return -1; + } + + memcpy(&info->mask, mask, sizeof(*mask)); + info->fd = fds[1]; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&tid, &attr, sigwait_compat, info); + + pthread_attr_destroy(&attr); + + return fds[0]; +} + +int qemu_signalfd(const sigset_t *mask) +{ +#if defined(SYS_signalfd) + int ret; + + ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8); + if (!(ret == -1 && errno == ENOSYS)) + return ret; +#endif + + return qemu_signalfd_compat(mask); +} + +int qemu_eventfd(int *fds) +{ +#if defined(SYS_eventfd) + int ret; + + ret = syscall(SYS_eventfd, 0); + if (ret >= 0) { + fds[0] = fds[1] = ret; + return 0; + } else if (!(ret == -1 && errno == ENOSYS)) + return ret; +#endif + + return pipe(fds); +} diff --git a/vl.c b/vl.c index 7ca8420..6f51d53 100644 --- a/vl.c +++ b/vl.c @@ -7470,7 +7470,6 @@ void main_loop_wait(int timeout) slirp_select_poll(&rfds, &wfds, &xfds); } #endif - qemu_aio_poll(); if (vm_running) { if (likely(!(cur_cpu->singlestep_enabled & SSTEP_NOTIMER)))