* [PATCH 1/6] Add support for eventfd() (v3)
@ 2008-05-07 16:55 Anthony Liguori
2008-05-07 16:55 ` [PATCH 2/6] Replace SIGUSR1 in io-thread with " Anthony Liguori
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
This patch adds compatibility code so that we can make use of eventfd() within
QEMU. eventfd() is a pretty useful mechanism as it allows multiple
notifications to be batched in a single system call.
We emulate eventfd() using a standard pipe().
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index bb4b9a3..46654f3 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -208,7 +208,7 @@ CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc
endif
ifeq ($(USE_KVM), 1)
-LIBOBJS+=qemu-kvm.o
+LIBOBJS+=qemu-kvm.o kvm-compatfd.o
endif
ifdef CONFIG_SOFTFLOAT
LIBOBJS+=fpu/softfloat.o
diff --git a/qemu/kvm-compatfd.c b/qemu/kvm-compatfd.c
new file mode 100644
index 0000000..1b030ba
--- /dev/null
+++ b/qemu/kvm-compatfd.c
@@ -0,0 +1,33 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu-kvm.h"
+
+#include <sys/syscall.h>
+
+int kvm_eventfd(int *fds)
+{
+#if defined(SYS_eventfd)
+ int ret;
+
+ ret = syscall(SYS_eventfd, 0);
+ if (ret >= 0) {
+ fds[0] = fds[1] = ret;
+ return 0;
+ } else if (!(ret == -1 && errno == ENOSYS))
+ return ret;
+#endif
+
+ return pipe(fds);
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 024a653..8fa3c1b 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -79,6 +79,8 @@ int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
#endif
+int kvm_eventfd(int *fds);
+
#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
#define BITMAP_SIZE(m) (ALIGN(((m)>>TARGET_PAGE_BITS), HOST_LONG_BITS) / 8)
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] Replace SIGUSR1 in io-thread with eventfd() (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
@ 2008-05-07 16:55 ` Anthony Liguori
2008-05-07 16:55 ` [PATCH 3/6] Add support for signalfd() (v3) Anthony Liguori
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
It's a little odd to use signals to raise a notification on a file descriptor
when we can just work directly with a file descriptor instead. This patch
converts the SIGUSR1 based notification in the io-thread to instead use an
eventfd file descriptor.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 9a9bf59..7134e56 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -15,6 +15,8 @@ int kvm_pit = 1;
#include <string.h>
#include "hw/hw.h"
#include "sysemu.h"
+#include "qemu-common.h"
+#include "console.h"
#include "qemu-kvm.h"
#include <libkvm.h>
@@ -61,6 +63,7 @@ struct vcpu_info {
} vcpu_info[256];
pthread_t io_thread;
+static int io_thread_fd = -1;
static inline unsigned long kvm_get_thread_id(void)
{
@@ -213,7 +216,7 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
if (env && vcpu_info[env->cpu_index].stop) {
vcpu_info[env->cpu_index].stop = 0;
vcpu_info[env->cpu_index].stopped = 1;
- pthread_kill(io_thread, SIGUSR1);
+ qemu_kvm_notify_work();
}
pthread_mutex_unlock(&qemu_mutex);
@@ -418,7 +421,6 @@ static void qemu_kvm_init_signal_tables(void)
kvm_add_signal(&io_signal_table, SIGIO);
kvm_add_signal(&io_signal_table, SIGALRM);
- kvm_add_signal(&io_signal_table, SIGUSR1);
kvm_add_signal(&io_signal_table, SIGUSR2);
kvm_add_signal(&vcpu_signal_table, SIG_IPI);
@@ -440,8 +442,51 @@ int kvm_init_ap(void)
void qemu_kvm_notify_work(void)
{
- if (io_thread)
- pthread_kill(io_thread, SIGUSR1);
+ uint64_t value = 1;
+ char buffer[8];
+ size_t offset = 0;
+
+ if (io_thread_fd == -1)
+ return;
+
+ memcpy(buffer, &value, sizeof(value));
+
+ while (offset < 8) {
+ ssize_t len;
+
+ len = write(io_thread_fd, buffer + offset, 8 - offset);
+ if (len == -1 && errno == EINTR)
+ continue;
+
+ if (len <= 0)
+ break;
+
+ offset += len;
+ }
+
+ if (offset != 8)
+ fprintf(stderr, "failed to notify io thread\n");
+}
+
+/* Used to break IO thread out of select */
+static void io_thread_wakeup(void *opaque)
+{
+ int fd = (unsigned long)opaque;
+ char buffer[8];
+ size_t offset = 0;
+
+ while (offset < 8) {
+ ssize_t len;
+
+ len = read(fd, buffer + offset, 8 - offset);
+ if (len == -1 && errno == EINTR)
+ continue;
+
+ if (len <= 0)
+ break;
+
+ offset += len;
+ }
}
/*
@@ -452,8 +497,20 @@ void qemu_kvm_notify_work(void)
int kvm_main_loop(void)
{
+ int fds[2];
+
io_thread = pthread_self();
qemu_system_ready = 1;
+
+ if (kvm_eventfd(fds) == -1) {
+ fprintf(stderr, "failed to create eventfd\n");
+ return -errno;
+ }
+
+ qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
+ (void *)(unsigned long)fds[0]);
+
+ io_thread_fd = fds[1];
pthread_mutex_unlock(&qemu_mutex);
pthread_cond_broadcast(&qemu_system_cond);
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] Add support for signalfd() (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 2/6] Replace SIGUSR1 in io-thread with " Anthony Liguori
@ 2008-05-07 16:55 ` Anthony Liguori
2008-05-07 16:55 ` [PATCH 4/6] Use signalfd() in io-thread (v3) Anthony Liguori
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
This patch adds compatibility code so that we can use signalfd() within QEMU.
signalfd() provides a mechanism to receive signal notification through a
file descriptor. This is very useful in eliminating the signal/select race
condition.
If signalfd() isn't available, we spawn a thread that uses sigwaitinfo() to
emulate it.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/kvm-compatfd.c b/qemu/kvm-compatfd.c
index 1b030ba..b1311e2 100644
--- a/qemu/kvm-compatfd.c
+++ b/qemu/kvm-compatfd.c
@@ -15,6 +15,97 @@
#include "qemu-kvm.h"
#include <sys/syscall.h>
+#include <pthread.h>
+
+struct sigfd_compat_info
+{
+ sigset_t mask;
+ int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+ struct sigfd_compat_info *info = opaque;
+ int err;
+
+ sigprocmask(SIG_BLOCK, &info->mask, NULL);
+
+ do {
+ siginfo_t siginfo;
+
+ err = sigwaitinfo(&info->mask, &siginfo);
+ if (err == -1 && errno == EINTR)
+ continue;
+
+ if (err > 0) {
+ char buffer[128];
+ size_t offset = 0;
+
+ memcpy(buffer, &err, sizeof(err));
+ while (offset < sizeof(buffer)) {
+ ssize_t len;
+
+ len = write(info->fd, buffer + offset,
+ sizeof(buffer) - offset);
+ if (len == -1 && errno == EINTR)
+ continue;
+
+ if (len <= 0) {
+ err = -1;
+ break;
+ }
+
+ offset += len;
+ }
+ }
+ } while (err >= 0);
+
+ return NULL;
+}
+
+static int kvm_signalfd_compat(const sigset_t *mask)
+{
+ pthread_attr_t attr;
+ pthread_t tid;
+ struct sigfd_compat_info *info;
+ int fds[2];
+
+ info = malloc(sizeof(*info));
+ if (info == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (pipe(fds) == -1) {
+ free(info);
+ return -1;
+ }
+
+ memcpy(&info->mask, mask, sizeof(*mask));
+ info->fd = fds[1];
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&tid, &attr, sigwait_compat, info);
+
+ pthread_attr_destroy(&attr);
+
+ return fds[0];
+}
+
+int kvm_signalfd(const sigset_t *mask)
+{
+#if defined(SYS_signalfd)
+ int ret;
+
+ ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+ if (!(ret == -1 && errno == ENOSYS))
+ return ret;
+#endif
+
+ return kvm_signalfd_compat(mask);
+}
int kvm_eventfd(int *fds)
{
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 8fa3c1b..a0dd4a8 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -10,6 +10,8 @@
#include "cpu.h"
+#include <signal.h>
+
int kvm_main_loop(void);
int kvm_qemu_init(void);
int kvm_qemu_create_context(void);
@@ -79,6 +81,16 @@ int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
#endif
+#if !defined(SYS_signalfd)
+struct signalfd_siginfo {
+ uint32_t ssi_signo;
+ uint8_t pad[124];
+};
+#else
+#include <linux/signalfd.h>
+#endif
+
+int kvm_signalfd(const sigset_t *mask);
int kvm_eventfd(int *fds);
#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] Use signalfd() in io-thread (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 2/6] Replace SIGUSR1 in io-thread with " Anthony Liguori
2008-05-07 16:55 ` [PATCH 3/6] Add support for signalfd() (v3) Anthony Liguori
@ 2008-05-07 16:55 ` Anthony Liguori
2008-05-07 16:55 ` [PATCH 5/6] Interrupt io thread in qemu_set_fd_handler2 (v3) Anthony Liguori
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
This patch reworks the IO thread to use signalfd() instead of sigtimedwait().
This will eliminate the need to use SIGIO everywhere.
Since v2, I've fixed a nasty bug in qemu_kvm_aio_wait(). We can't use
main_loop_wait() to sleep if it's at all possible we're being called from
a handler in main_loop_wait() (which is the case with qemu_kvm_aio_wait()).
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 7134e56..492c3c4 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -17,6 +17,7 @@ int kvm_pit = 1;
#include "sysemu.h"
#include "qemu-common.h"
#include "console.h"
+#include "block.h"
#include "qemu-kvm.h"
#include <libkvm.h>
@@ -36,18 +37,11 @@ pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
+pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
__thread struct vcpu_info *vcpu;
static int qemu_system_ready;
-struct qemu_kvm_signal_table {
- sigset_t sigset;
- sigset_t negsigset;
-};
-
-static struct qemu_kvm_signal_table io_signal_table;
-static struct qemu_kvm_signal_table vcpu_signal_table;
-
#define SIG_IPI (SIGRTMIN+4)
struct vcpu_info {
@@ -64,6 +58,7 @@ struct vcpu_info {
pthread_t io_thread;
static int io_thread_fd = -1;
+static int io_thread_sigfd = -1;
static inline unsigned long kvm_get_thread_id(void)
{
@@ -172,37 +167,23 @@ static int has_work(CPUState *env)
return kvm_arch_has_work(env);
}
-static int kvm_process_signal(int si_signo)
-{
- struct sigaction sa;
-
- switch (si_signo) {
- case SIGUSR2:
- pthread_cond_signal(&qemu_aio_cond);
- break;
- case SIGALRM:
- case SIGIO:
- sigaction(si_signo, NULL, &sa);
- sa.sa_handler(si_signo);
- break;
- }
-
- return 1;
-}
-
-static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
- int timeout)
+static int kvm_eat_signal(CPUState *env, int timeout)
{
struct timespec ts;
int r, e, ret = 0;
siginfo_t siginfo;
+ sigset_t waitset;
ts.tv_sec = timeout / 1000;
ts.tv_nsec = (timeout % 1000) * 1000000;
- r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
+ sigemptyset(&waitset);
+ sigaddset(&waitset, SIG_IPI);
+
+ r = sigtimedwait(&waitset, &siginfo, &ts);
if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
return 0;
e = errno;
+
pthread_mutex_lock(&qemu_mutex);
if (env && vcpu)
cpu_single_env = vcpu->env;
@@ -211,12 +192,12 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
exit(1);
}
if (r != -1)
- ret = kvm_process_signal(siginfo.si_signo);
+ ret = 1;
if (env && vcpu_info[env->cpu_index].stop) {
vcpu_info[env->cpu_index].stop = 0;
vcpu_info[env->cpu_index].stopped = 1;
- qemu_kvm_notify_work();
+ pthread_cond_signal(&qemu_pause_cond);
}
pthread_mutex_unlock(&qemu_mutex);
@@ -227,14 +208,13 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
static void kvm_eat_signals(CPUState *env, int timeout)
{
int r = 0;
- struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
- while (kvm_eat_signal(waitset, env, 0))
+ while (kvm_eat_signal(env, 0))
r = 1;
if (!r && timeout) {
- r = kvm_eat_signal(waitset, env, timeout);
+ r = kvm_eat_signal(env, timeout);
if (r)
- while (kvm_eat_signal(waitset, env, 0))
+ while (kvm_eat_signal(env, 0))
;
}
}
@@ -266,12 +246,8 @@ static void pause_all_threads(void)
vcpu_info[i].stop = 1;
pthread_kill(vcpu_info[i].thread, SIG_IPI);
}
- while (!all_threads_paused()) {
- pthread_mutex_unlock(&qemu_mutex);
- kvm_eat_signal(&io_signal_table, NULL, 1000);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
- }
+ while (!all_threads_paused())
+ pthread_cond_wait(&qemu_pause_cond, &qemu_mutex);
}
static void resume_all_threads(void)
@@ -310,6 +286,12 @@ static void setup_kernel_sigmask(CPUState *env)
{
sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR2);
+ sigaddset(&set, SIGIO);
+ sigaddset(&set, SIGALRM);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+
sigprocmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
@@ -346,7 +328,7 @@ static int kvm_main_loop_cpu(CPUState *env)
cpu_single_env = env;
while (1) {
while (!has_work(env))
- kvm_main_loop_wait(env, 10);
+ kvm_main_loop_wait(env, 1000);
if (env->interrupt_request & CPU_INTERRUPT_HARD)
env->hflags &= ~HF_HALTED_MASK;
if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
@@ -394,18 +376,6 @@ static void *ap_main_loop(void *_env)
return NULL;
}
-static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
-{
- sigemptyset(&sigtab->sigset);
- sigfillset(&sigtab->negsigset);
-}
-
-static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
-{
- sigaddset(&sigtab->sigset, signum);
- sigdelset(&sigtab->negsigset, signum);
-}
-
void kvm_init_new_ap(int cpu, CPUState *env)
{
pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
@@ -414,27 +384,12 @@ void kvm_init_new_ap(int cpu, CPUState *env)
pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex);
}
-static void qemu_kvm_init_signal_tables(void)
-{
- qemu_kvm_init_signal_table(&io_signal_table);
- qemu_kvm_init_signal_table(&vcpu_signal_table);
-
- kvm_add_signal(&io_signal_table, SIGIO);
- kvm_add_signal(&io_signal_table, SIGALRM);
- kvm_add_signal(&io_signal_table, SIGUSR2);
-
- kvm_add_signal(&vcpu_signal_table, SIG_IPI);
-
- sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
-}
-
int kvm_init_ap(void)
{
#ifdef TARGET_I386
kvm_tpr_opt_setup();
#endif
qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
- qemu_kvm_init_signal_tables();
signal(SIG_IPI, sig_ipi_handler);
return 0;
@@ -468,6 +423,61 @@ void qemu_kvm_notify_work(void)
fprintf(stderr, "failed to notify io thread\n");
}
+static int received_signal;
+
+/* QEMU relies on periodically breaking out of select via EINTR to poll for IO
+ and timer signals. Since we're now using a file descriptor to handle
+ signals, select() won't be interrupted by a signal. We need to forcefully
+ break the select() loop when a signal is received hence
+ kvm_check_received_signal(). */
+
+int kvm_check_received_signal(void)
+{
+ if (received_signal) {
+ received_signal = 0;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them. We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+
+static void sigfd_handler(void *opaque)
+{
+ int fd = (unsigned long)opaque;
+ struct signalfd_siginfo info;
+ struct sigaction action;
+ ssize_t len;
+
+ while (1) {
+ do {
+ len = read(fd, &info, sizeof(info));
+ } while (len == -1 && errno == EINTR);
+
+ if (len == -1 && errno == EAGAIN)
+ break;
+
+ if (len != sizeof(info)) {
+ printf("read from sigfd returned %ld: %m\n", len);
+ return;
+ }
+
+ sigaction(info.ssi_signo, NULL, &action);
+ if (action.sa_handler)
+ action.sa_handler(info.ssi_signo);
+
+ if (info.ssi_signo == SIGUSR2) {
+ pthread_cond_signal(&qemu_aio_cond);
+ }
+ }
+
+ received_signal = 1;
+}
+
/* Used to break IO thread out of select */
static void io_thread_wakeup(void *opaque)
{
@@ -487,17 +497,15 @@ static void io_thread_wakeup(void *opaque)
offset += len;
}
-}
-/*
- * The IO thread has all signals that inform machine events
- * blocked (io_signal_table), so it won't get interrupted
- * while processing in main_loop_wait().
- */
+ received_signal = 1;
+}
int kvm_main_loop(void)
{
int fds[2];
+ sigset_t mask;
+ int sigfd;
io_thread = pthread_self();
qemu_system_ready = 1;
@@ -511,15 +519,31 @@ int kvm_main_loop(void)
(void *)(unsigned long)fds[0]);
io_thread_fd = fds[1];
- pthread_mutex_unlock(&qemu_mutex);
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGIO);
+ sigaddset(&mask, SIGALRM);
+ sigaddset(&mask, SIGUSR2);
+ sigprocmask(SIG_BLOCK, &mask, NULL);
+
+ sigfd = kvm_signalfd(&mask);
+ if (sigfd == -1) {
+ fprintf(stderr, "failed to create signalfd\n");
+ return -errno;
+ }
+
+ fcntl(sigfd, F_SETFL, O_NONBLOCK);
+
+ qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
+ (void *)(unsigned long)sigfd);
pthread_cond_broadcast(&qemu_system_cond);
+ io_thread_sigfd = sigfd;
+ cpu_single_env = NULL;
+
while (1) {
- kvm_eat_signal(&io_signal_table, NULL, 1000);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
- main_loop_wait(0);
+ main_loop_wait(1000);
if (qemu_shutdown_requested())
break;
else if (qemu_powerdown_requested())
@@ -528,7 +552,6 @@ int kvm_main_loop(void)
pthread_kill(vcpu_info[0].thread, SIG_IPI);
qemu_kvm_reset_requested = 1;
}
- pthread_mutex_unlock(&qemu_mutex);
}
pause_all_threads();
@@ -891,10 +914,21 @@ void qemu_kvm_aio_wait(void)
CPUState *cpu_single = cpu_single_env;
if (!cpu_single_env) {
- pthread_mutex_unlock(&qemu_mutex);
- kvm_eat_signal(&io_signal_table, NULL, 1000);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
+ if (io_thread_sigfd != -1) {
+ fd_set rfds;
+ int ret;
+
+ FD_ZERO(&rfds);
+ FD_SET(io_thread_sigfd, &rfds);
+
+ /* this is a rare case where we do want to hold qemu_mutex
+ * while sleeping. We cannot allow anything else to run
+ * right now. */
+ ret = select(io_thread_sigfd + 1, &rfds, NULL, NULL, NULL);
+ if (ret > 0 && FD_ISSET(io_thread_sigfd, &rfds))
+ sigfd_handler((void *)(unsigned long)io_thread_sigfd);
+ }
+ qemu_aio_poll();
} else {
pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
cpu_single_env = cpu_single;
@@ -921,3 +955,14 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
{
kvm_destroy_phys_mem(kvm_context, start_addr, size);
}
+
+void kvm_mutex_unlock(void)
+{
+ pthread_mutex_unlock(&qemu_mutex);
+}
+
+void kvm_mutex_lock(void)
+{
+ pthread_mutex_lock(&qemu_mutex);
+ cpu_single_env = NULL;
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index a0dd4a8..df573ec 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -12,6 +12,8 @@
#include <signal.h>
+#include <signal.h>
+
int kvm_main_loop(void);
int kvm_qemu_init(void);
int kvm_qemu_create_context(void);
@@ -111,4 +113,28 @@ extern kvm_context_t kvm_context;
#define qemu_kvm_pit_in_kernel() (0)
#endif
+void kvm_mutex_unlock(void);
+void kvm_mutex_lock(void);
+
+static inline void kvm_sleep_begin(void)
+{
+ if (kvm_enabled())
+ kvm_mutex_unlock();
+}
+
+static inline void kvm_sleep_end(void)
+{
+ if (kvm_enabled())
+ kvm_mutex_lock();
+}
+
+int kvm_check_received_signal(void);
+
+static inline int kvm_received_signal(void)
+{
+ if (kvm_enabled())
+ return kvm_check_received_signal();
+ return 0;
+}
+
#endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 3fcf6b6..541aacc 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7271,6 +7271,23 @@ void qemu_register_boot_set(QEMUBootSetHandler *func)
qemu_boot_set_handler = func;
}
+static int qemu_select(int max_fd, fd_set *rfds, fd_set *wfds, fd_set *xfds,
+ struct timeval *tv)
+{
+ int ret;
+
+ /* KVM holds a mutex while QEMU code is running, we need hooks to
+ release the mutex whenever QEMU code sleeps. */
+
+ kvm_sleep_begin();
+
+ ret = select(max_fd, rfds, wfds, xfds, tv);
+
+ kvm_sleep_end();
+
+ return ret;
+}
+
void main_loop_wait(int timeout)
{
IOHandlerRecord *ioh;
@@ -7342,11 +7359,12 @@ void main_loop_wait(int timeout)
}
}
- tv.tv_sec = 0;
#ifdef _WIN32
+ tv.tv_sec = 0;
tv.tv_usec = 0;
#else
- tv.tv_usec = timeout * 1000;
+ tv.tv_sec = timeout / 1000;
+ tv.tv_usec = (timeout % 1000) * 1000;
#endif
#if defined(CONFIG_SLIRP)
if (slirp_inited) {
@@ -7354,7 +7372,7 @@ void main_loop_wait(int timeout)
}
#endif
moreio:
- ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
+ ret = qemu_select(nfds + 1, &rfds, &wfds, &xfds, &tv);
if (ret > 0) {
IOHandlerRecord **pioh;
int more = 0;
@@ -7383,7 +7401,7 @@ void main_loop_wait(int timeout)
} else
pioh = &ioh->next;
}
- if (more)
+ if (more && !kvm_received_signal())
goto moreio;
}
#if defined(CONFIG_SLIRP)
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] Interrupt io thread in qemu_set_fd_handler2 (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
` (2 preceding siblings ...)
2008-05-07 16:55 ` [PATCH 4/6] Use signalfd() in io-thread (v3) Anthony Liguori
@ 2008-05-07 16:55 ` Anthony Liguori
2008-05-07 16:55 ` [PATCH 6/6] Only select once per-main_loop iteration (v3) Anthony Liguori
2008-05-08 17:16 ` [PATCH 1/6] Add support for eventfd() (v3) Avi Kivity
5 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
The select() in the IO thread may wait a long time before rebuilding the
fd set. Whenever we do something that changes the fd set, we should interrupt
the IO thread.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/vl.c b/qemu/vl.c
index 1192759..e9f0ca4 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -260,6 +260,16 @@ static int event_pending = 1;
#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
+/* KVM runs the main loop in a separate thread. If we update one of the lists
+ * that are polled before or after select(), we need to make sure to break out
+ * of the select() to ensure the new item is serviced.
+ */
+static void main_loop_break(void)
+{
+ if (kvm_enabled())
+ qemu_kvm_notify_work();
+}
+
void decorate_application_name(char *appname, int max_len)
{
if (kvm_enabled())
@@ -5680,6 +5690,7 @@ int qemu_set_fd_handler2(int fd,
ioh->opaque = opaque;
ioh->deleted = 0;
}
+ main_loop_break();
return 0;
}
@@ -7606,8 +7617,7 @@ void qemu_bh_schedule(QEMUBH *bh)
if (env) {
cpu_interrupt(env, CPU_INTERRUPT_EXIT);
}
- if (kvm_enabled())
- qemu_kvm_notify_work();
+ main_loop_break();
}
void qemu_bh_cancel(QEMUBH *bh)
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] Only select once per-main_loop iteration (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
` (3 preceding siblings ...)
2008-05-07 16:55 ` [PATCH 5/6] Interrupt io thread in qemu_set_fd_handler2 (v3) Anthony Liguori
@ 2008-05-07 16:55 ` Anthony Liguori
2008-05-08 17:16 ` [PATCH 1/6] Add support for eventfd() (v3) Avi Kivity
5 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-05-07 16:55 UTC (permalink / raw)
To: kvm-devel; +Cc: Anthony Liguori, Marcelo Tosatti, Avi Kivity
QEMU is rather aggressive about exhausting the wait period when selecting.
This is fine when the wait period is low and when there is significant delays
in-between selects as it improves IO throughput.
With the IO thread, there is a very small delay between selects and our wait
period for select is very large. This patch changes main_loop_wait to only
select once before doing the various other things in the main loop. This
generally improves responsiveness of things like SDL but also improves
individual file descriptor throughput quite dramatically.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 492c3c4..cc8f292 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -423,24 +423,6 @@ void qemu_kvm_notify_work(void)
fprintf(stderr, "failed to notify io thread\n");
}
-static int received_signal;
-
-/* QEMU relies on periodically breaking out of select via EINTR to poll for IO
- and timer signals. Since we're now using a file descriptor to handle
- signals, select() won't be interrupted by a signal. We need to forcefully
- break the select() loop when a signal is received hence
- kvm_check_received_signal(). */
-
-int kvm_check_received_signal(void)
-{
- if (received_signal) {
- received_signal = 0;
- return 1;
- }
-
- return 0;
-}
-
/* If we have signalfd, we mask out the signals we want to handle and then
* use signalfd to listen for them. We rely on whatever the current signal
* handler is to dispatch the signals when we receive them.
@@ -474,8 +456,6 @@ static void sigfd_handler(void *opaque)
pthread_cond_signal(&qemu_aio_cond);
}
}
-
- received_signal = 1;
}
/* Used to break IO thread out of select */
@@ -497,8 +477,6 @@ static void io_thread_wakeup(void *opaque)
offset += len;
}
-
- received_signal = 1;
}
int kvm_main_loop(void)
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index df573ec..21606e9 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -128,13 +128,4 @@ static inline void kvm_sleep_end(void)
kvm_mutex_lock();
}
-int kvm_check_received_signal(void);
-
-static inline int kvm_received_signal(void)
-{
- if (kvm_enabled())
- return kvm_check_received_signal();
- return 0;
-}
-
#endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 4e25366..a1aa270 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7381,23 +7381,18 @@ void main_loop_wait(int timeout)
slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
}
#endif
- moreio:
ret = qemu_select(nfds + 1, &rfds, &wfds, &xfds, &tv);
if (ret > 0) {
IOHandlerRecord **pioh;
- int more = 0;
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
if (!ioh->deleted && ioh->fd_read && FD_ISSET(ioh->fd, &rfds)) {
ioh->fd_read(ioh->opaque);
- if (!ioh->fd_read_poll || ioh->fd_read_poll(ioh->opaque))
- more = 1;
- else
+ if (!(ioh->fd_read_poll && ioh->fd_read_poll(ioh->opaque)))
FD_CLR(ioh->fd, &rfds);
}
if (!ioh->deleted && ioh->fd_write && FD_ISSET(ioh->fd, &wfds)) {
ioh->fd_write(ioh->opaque);
- more = 1;
}
}
@@ -7411,8 +7406,6 @@ void main_loop_wait(int timeout)
} else
pioh = &ioh->next;
}
- if (more && !kvm_received_signal())
- goto moreio;
}
#if defined(CONFIG_SLIRP)
if (slirp_inited) {
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/6] Add support for eventfd() (v3)
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
` (4 preceding siblings ...)
2008-05-07 16:55 ` [PATCH 6/6] Only select once per-main_loop iteration (v3) Anthony Liguori
@ 2008-05-08 17:16 ` Avi Kivity
5 siblings, 0 replies; 7+ messages in thread
From: Avi Kivity @ 2008-05-08 17:16 UTC (permalink / raw)
To: Anthony Liguori; +Cc: kvm-devel, Marcelo Tosatti
Anthony Liguori wrote:
> This patch adds compatibility code so that we can make use of eventfd() within
> QEMU. eventfd() is a pretty useful mechanism as it allows multiple
> notifications to be batched in a single system call.
>
> We emulate eventfd() using a standard pipe().
>
Applied all six patches; thanks.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-05-08 17:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-07 16:55 [PATCH 1/6] Add support for eventfd() (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 2/6] Replace SIGUSR1 in io-thread with " Anthony Liguori
2008-05-07 16:55 ` [PATCH 3/6] Add support for signalfd() (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 4/6] Use signalfd() in io-thread (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 5/6] Interrupt io thread in qemu_set_fd_handler2 (v3) Anthony Liguori
2008-05-07 16:55 ` [PATCH 6/6] Only select once per-main_loop iteration (v3) Anthony Liguori
2008-05-08 17:16 ` [PATCH 1/6] Add support for eventfd() (v3) Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox