* [Qemu-devel] [PATCH 2/3] aio: Introduce aio_context_setup
2015-10-12 9:55 [Qemu-devel] [PATCH 0/3] aio: Use epoll in aio_poll() Fam Zheng
2015-10-12 9:55 ` [Qemu-devel] [PATCH 1/3] aio: Move AioHandler struct to header Fam Zheng
@ 2015-10-12 9:55 ` Fam Zheng
2015-10-12 9:55 ` [Qemu-devel] [PATCH 3/3] aio: Introduce aio-epoll.c Fam Zheng
2 siblings, 0 replies; 6+ messages in thread
From: Fam Zheng @ 2015-10-12 9:55 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, qemu-block, Stefan Hajnoczi
This is the place to initialize platform specific bits of AioContext.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
aio-posix.c | 4 ++++
aio-win32.c | 4 ++++
async.c | 14 ++++++++++++--
include/block/aio-internal.h | 2 ++
4 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/aio-posix.c b/aio-posix.c
index 7ae54fc..4fd2383 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -288,3 +288,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
return progress;
}
+
+void aio_context_setup(AioContext *ctx, Error **errp)
+{
+}
diff --git a/aio-win32.c b/aio-win32.c
index f018934..7873141 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -353,3 +353,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
aio_context_release(ctx);
return progress;
}
+
+void aio_context_setup(AioContext *ctx, Error **errp)
+{
+}
diff --git a/async.c b/async.c
index efce14b..72cdc9b 100644
--- a/async.c
+++ b/async.c
@@ -27,6 +27,7 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "block/aio-internal.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -320,12 +321,18 @@ AioContext *aio_context_new(Error **errp)
{
int ret;
AioContext *ctx;
+ Error *local_err = NULL;
+
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+ aio_context_setup(ctx, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto fail;
+ }
ret = event_notifier_init(&ctx->notifier, false);
if (ret < 0) {
- g_source_destroy(&ctx->source);
error_setg_errno(errp, -ret, "Failed to initialize event notifier");
- return NULL;
+ goto fail;
}
g_source_set_can_recurse(&ctx->source, true);
aio_set_event_notifier(ctx, &ctx->notifier,
@@ -339,6 +346,9 @@ AioContext *aio_context_new(Error **errp)
ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
return ctx;
+fail:
+ g_source_destroy(&ctx->source);
+ return NULL;
}
void aio_context_ref(AioContext *ctx)
diff --git a/include/block/aio-internal.h b/include/block/aio-internal.h
index 2ffbcdc..f50a37c 100644
--- a/include/block/aio-internal.h
+++ b/include/block/aio-internal.h
@@ -27,4 +27,6 @@ struct AioHandler {
QLIST_ENTRY(AioHandler) node;
};
+void aio_context_setup(AioContext *ctx, Error **errp);
+
#endif
--
2.6.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [Qemu-devel] [PATCH 3/3] aio: Introduce aio-epoll.c
2015-10-12 9:55 [Qemu-devel] [PATCH 0/3] aio: Use epoll in aio_poll() Fam Zheng
2015-10-12 9:55 ` [Qemu-devel] [PATCH 1/3] aio: Move AioHandler struct to header Fam Zheng
2015-10-12 9:55 ` [Qemu-devel] [PATCH 2/3] aio: Introduce aio_context_setup Fam Zheng
@ 2015-10-12 9:55 ` Fam Zheng
2015-10-12 10:06 ` Paolo Bonzini
2 siblings, 1 reply; 6+ messages in thread
From: Fam Zheng @ 2015-10-12 9:55 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, qemu-block, Stefan Hajnoczi
To minimize code duplication, epoll is hooked into aio-posix's
aio_poll() instead of rolling its own. This approach also has the
advantage that both compile time and run time ability to switch from
between the two:
1) If configure script didn't find epoll, the libqemustub.a nop
functions will be used, which selects the usual ppoll.
2) When QEMU starts with a small number of fds in the event loop, ppoll
is used.
3) When QEMU starts with a big number of fds, or when more devices are
hot plugged after starting up, epoll automatically kicks in after the
number of fds hits the threshold.
4) Some fds may not support epoll, such as tty based stdio. In this
case, we can fall back to ppoll.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
Makefile.objs | 1 +
aio-epoll.c | 150 +++++++++++++++++++++++++++++++++++++++++++
aio-posix.c | 16 ++++-
include/block/aio-internal.h | 15 +++++
include/block/aio.h | 5 ++
stubs/Makefile.objs | 1 +
stubs/aio-epoll.c | 37 +++++++++++
7 files changed, 223 insertions(+), 2 deletions(-)
create mode 100644 aio-epoll.c
create mode 100644 stubs/aio-epoll.c
diff --git a/Makefile.objs b/Makefile.objs
index bc43e5c..8f401b7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -10,6 +10,7 @@ util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o
block-obj-y = async.o thread-pool.o
block-obj-y += nbd.o block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
+block-obj-$(CONFIG_EPOLL) += aio-epoll.o
block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
diff --git a/aio-epoll.c b/aio-epoll.c
new file mode 100644
index 0000000..4557dcb
--- /dev/null
+++ b/aio-epoll.c
@@ -0,0 +1,150 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright Red Hat, Inc, 2015
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block/block.h"
+#include "qemu/queue.h"
+#include "block/aio-internal.h"
+#include <sys/epoll.h>
+
+/* The fd number threashold to switch to epoll */
+#define EPOLL_ENABLE_THRESHOLD 64
+
+static void aio_epoll_disable(AioContext *ctx)
+{
+ ctx->epoll_available = false;
+ if (!ctx->epoll_enabled) {
+ return;
+ }
+ ctx->epoll_enabled = false;
+ close(ctx->epollfd);
+}
+
+static inline int epoll_events_from_pfd(int pfd_events)
+{
+ return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
+ (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
+ (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
+ (pfd_events & G_IO_ERR ? EPOLLERR : 0);
+}
+
+static bool aio_epoll_try_enable(AioContext *ctx)
+{
+ AioHandler *node;
+ struct epoll_event event;
+ if (!ctx->epoll_available) {
+ return false;
+ }
+
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ int r;
+ if (node->deleted || !node->pfd.events) {
+ continue;
+ }
+ event.events = epoll_events_from_pfd(node->pfd.events);
+ event.data.ptr = node;
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+ if (r) {
+ return false;
+ }
+ }
+ ctx->epoll_enabled = true;
+ return true;
+}
+
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+ struct epoll_event event;
+ int r;
+
+ if (!ctx->epoll_enabled) {
+ return;
+ }
+ if (!node->pfd.events) {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
+ assert(!r);
+ } else {
+ event.data.ptr = node;
+ event.events = epoll_events_from_pfd(node->pfd.events);
+ if (is_new) {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+ if (r) {
+ aio_epoll_disable(ctx);
+ }
+ } else {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
+ assert(!r);
+ }
+ }
+}
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout)
+{
+ AioHandler *node;
+ int i, ret = 0;
+ struct epoll_event events[128];
+
+ assert(npfd == 1);
+ assert(pfds[0].fd == ctx->epollfd);
+ if (timeout > 0) {
+ ret = qemu_poll_ns(pfds, npfd, timeout);
+ }
+ if (timeout <= 0 || ret > 0) {
+ ret = epoll_wait(ctx->epollfd, events,
+ sizeof(events) / sizeof(events[0]),
+ timeout);
+ if (ret <= 0) {
+ goto out;
+ }
+ for (i = 0; i < ret; i++) {
+ int ev = events[i].events;
+ node = events[i].data.ptr;
+ node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+ (ev & EPOLLOUT ? G_IO_OUT : 0) |
+ (ev & EPOLLHUP ? G_IO_HUP : 0) |
+ (ev & EPOLLERR ? G_IO_ERR : 0);
+ }
+ }
+out:
+ return ret;
+}
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ if (!ctx->epoll_available) {
+ return false;
+ }
+ if (ctx->epoll_enabled) {
+ return true;
+ }
+ if (npfd >= EPOLL_ENABLE_THRESHOLD && aio_epoll_try_enable(ctx)) {
+ return true;
+ } else {
+ aio_epoll_disable(ctx);
+ return false;
+ }
+}
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp)
+{
+ assert(!ctx->epollfd);
+ ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
+ if (ctx->epollfd == -1) {
+ ctx->epoll_available = false;
+ } else {
+ ctx->epoll_available = true;
+ }
+}
diff --git a/aio-posix.c b/aio-posix.c
index 4fd2383..2c0fe29 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -39,6 +39,7 @@ void aio_set_fd_handler(AioContext *ctx,
void *opaque)
{
AioHandler *node;
+ bool is_new = false;
node = find_aio_handler(ctx, fd);
@@ -68,6 +69,7 @@ void aio_set_fd_handler(AioContext *ctx,
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
g_source_add_poll(&ctx->source, &node->pfd);
+ is_new = true;
}
/* Update handler with latest information */
node->io_read = io_read;
@@ -78,6 +80,7 @@ void aio_set_fd_handler(AioContext *ctx,
node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
}
+ aio_epoll_update(ctx, node, is_new);
aio_notify(ctx);
}
@@ -248,7 +251,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* fill pollfds */
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (!node->deleted && node->pfd.events) {
+ if (!node->deleted && node->pfd.events && !ctx->epoll_enabled) {
add_pollfd(node);
}
}
@@ -259,7 +262,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
if (timeout) {
aio_context_release(ctx);
}
- ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
+ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+ npfd = 0;
+ AioHandler epoll_handler = { 0 };
+ aio_epoll_fill(ctx, &epoll_handler);
+ add_pollfd(&epoll_handler);
+ ret = aio_epoll(ctx, pollfds, npfd, timeout);
+ } else {
+ ret = qemu_poll_ns(pollfds, npfd, timeout);
+ }
if (blocking) {
atomic_sub(&ctx->notify_me, 2);
}
@@ -291,4 +302,5 @@ bool aio_poll(AioContext *ctx, bool blocking)
void aio_context_setup(AioContext *ctx, Error **errp)
{
+ aio_context_setup_epoll(ctx, errp);
}
diff --git a/include/block/aio-internal.h b/include/block/aio-internal.h
index f50a37c..330647c 100644
--- a/include/block/aio-internal.h
+++ b/include/block/aio-internal.h
@@ -29,4 +29,19 @@ struct AioHandler {
void aio_context_setup(AioContext *ctx, Error **errp);
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new);
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout);
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout);
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp);
+
+static inline void aio_epoll_fill(AioContext *ctx, AioHandler *node)
+{
+ node->pfd.fd = ctx->epollfd;
+ node->pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+}
+
#endif
diff --git a/include/block/aio.h b/include/block/aio.h
index 400b1b0..e4e0563 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,11 @@ struct AioContext {
/* TimerLists for calling timers - one per clock type */
QEMUTimerListGroup tlg;
+
+ /* Fields below are used by aio-epoll. */
+ int epollfd;
+ bool epoll_enabled;
+ bool epoll_available;
};
/**
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 85e4e81..c2c90a2 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -39,3 +39,4 @@ stub-obj-y += cpus.o
stub-obj-y += kvm.o
stub-obj-y += qmp_pc_dimm_device_list.o
stub-obj-y += target-monitor-defs.o
+stub-obj-y += aio-epoll.o
diff --git a/stubs/aio-epoll.c b/stubs/aio-epoll.c
new file mode 100644
index 0000000..b482897
--- /dev/null
+++ b/stubs/aio-epoll.c
@@ -0,0 +1,37 @@
+/*
+ * QEMU aio epoll stub functions
+ *
+ * Copyright Red Hat, Inc, 2015
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block/aio-internal.h"
+
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+}
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout)
+{
+ assert(false);
+}
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ return false;
+}
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp)
+{
+}
+
--
2.6.1
^ permalink raw reply related [flat|nested] 6+ messages in thread