* [Qemu-devel] [PATCH for-2.3 1/5] poll: Introduce QEMU Poll API
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
@ 2014-11-25 8:07 ` Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 2/5] posix-aio: Use QEMU poll interface Fam Zheng
` (4 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-25 8:07 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi
This is abstract of underlying poll implementation. A glib
implementation is included.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
Makefile.objs | 2 +-
include/qemu/poll.h | 40 +++++++++++++++
include/qemu/typedefs.h | 2 +
poll-glib.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 173 insertions(+), 1 deletion(-)
create mode 100644 include/qemu/poll.h
create mode 100644 poll-glib.c
diff --git a/Makefile.objs b/Makefile.objs
index 18fd35c..57184eb 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,7 +9,7 @@ util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
block-obj-y = async.o thread-pool.o
block-obj-y += nbd.o block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
-block-obj-$(CONFIG_POSIX) += aio-posix.o
+block-obj-$(CONFIG_POSIX) += aio-posix.o poll-glib.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
diff --git a/include/qemu/poll.h b/include/qemu/poll.h
new file mode 100644
index 0000000..597975f
--- /dev/null
+++ b/include/qemu/poll.h
@@ -0,0 +1,40 @@
+#ifndef QEMU_POLL_H
+#define QEMU_POLL_H
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+struct QEMUPollEvent {
+ int fd;
+ int events;
+ int revents;
+ void *opaque;
+};
+
+QEMUPoll *qemu_poll_new(void);
+void qemu_poll_free(QEMUPoll *qpoll);
+int qemu_poll(QEMUPoll *qpoll, int64_t timeout_ns);
+
+/* Add an fd to poll. Return -EEXIST if fd already registered. */
+int qemu_poll_add(QEMUPoll *qpoll, int fd, int gio_events, void *opaque);
+
+/* Delete a previously added fd. Return -ENOENT if fd not registered. */
+int qemu_poll_del(QEMUPoll *qpoll, int fd);
+
+/**
+ * A shortcut to:
+ * 1) remove all the existing fds;
+ * 2) add all in @fds, while setting each fd's opaque pointer to the pollfd
+ * struct itself.
+ */
+int qemu_poll_set_fds(QEMUPoll *qpoll, GPollFD *fds, int nfds);
+
+/**
+ * Query the events from last qemu_poll. ONLY revent and opaque are valid in
+ * @events after return.
+ */
+int qemu_poll_get_events(QEMUPoll *qpoll,
+ QEMUPollEvent *events,
+ int max_events);
+
+#endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 3475177..f9a5e60 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -7,6 +7,8 @@ typedef struct QEMUTimer QEMUTimer;
typedef struct QEMUTimerListGroup QEMUTimerListGroup;
typedef struct QEMUFile QEMUFile;
typedef struct QEMUBH QEMUBH;
+typedef struct QEMUPoll QEMUPoll;
+typedef struct QEMUPollEvent QEMUPollEvent;
typedef struct AioContext AioContext;
diff --git a/poll-glib.c b/poll-glib.c
new file mode 100644
index 0000000..64fde69
--- /dev/null
+++ b/poll-glib.c
@@ -0,0 +1,130 @@
+/*
+ * g_poll implementation for QEMU Poll API
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/poll.h"
+
+struct QEMUPoll {
+ /* Array of GPollFD for g_poll() */
+ GArray *gpollfds;
+
+ /* Array of opaque pointers, should be in sync with gpollfds */
+ GArray *opaque;
+};
+
+QEMUPoll *qemu_poll_new(void)
+{
+ QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
+ qpoll->gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+ qpoll->opaque = g_array_new(FALSE, FALSE, sizeof(void *));
+ return qpoll;
+}
+
+
+void qemu_poll_free(QEMUPoll *qpoll)
+{
+ g_array_unref(qpoll->gpollfds);
+ g_array_unref(qpoll->opaque);
+ g_free(qpoll);
+}
+
+int qemu_poll(QEMUPoll *qpoll, int64_t timeout_ns)
+{
+ int i;
+ for (i = 0; i < qpoll->gpollfds->len; i++) {
+ GPollFD *p = &g_array_index(qpoll->gpollfds, GPollFD, i);
+ p->revents = 0;
+ }
+ return g_poll((GPollFD *)qpoll->gpollfds->data,
+ qpoll->gpollfds->len,
+ qemu_timeout_ns_to_ms(timeout_ns));
+}
+
+/* Add an fd to poll. Return -EEXIST if fd already registered. */
+int qemu_poll_add(QEMUPoll *qpoll, int fd, int gio_events, void *opaque)
+{
+ int i;
+ GPollFD pfd = (GPollFD) {
+ .fd = fd,
+ .revents = 0,
+ .events = gio_events,
+ };
+
+ for (i = 0; i < qpoll->gpollfds->len; i++) {
+ GPollFD *p = &g_array_index(qpoll->gpollfds, GPollFD, i);
+ if (p->fd == fd) {
+ return -EEXIST;
+ }
+ }
+ g_array_append_val(qpoll->gpollfds, pfd);
+ g_array_append_val(qpoll->opaque, opaque);
+ assert(qpoll->gpollfds->len == qpoll->opaque->len);
+ return 0;
+}
+
+/* Delete a previously added fd. Return -ENOENT if fd not registered. */
+int qemu_poll_del(QEMUPoll *qpoll, int fd)
+{
+ int i;
+ int found = -1;
+
+ for (i = 0; i < qpoll->gpollfds->len; i++) {
+ GPollFD *p = &g_array_index(qpoll->gpollfds, GPollFD, i);
+ if (p->fd == fd) {
+ found = i;
+ break;
+ }
+ }
+ if (found >= 0) {
+ g_array_remove_index(qpoll->gpollfds, found);
+ g_array_remove_index(qpoll->opaque, found);
+ assert(qpoll->gpollfds->len == qpoll->opaque->len);
+ return 0;
+ } else {
+ return -ENOENT;
+ }
+}
+
+int qemu_poll_set_fds(QEMUPoll *qpoll, GPollFD *fds, int nfds)
+{
+ int i;
+ g_array_set_size(qpoll->gpollfds, 0);
+ g_array_set_size(qpoll->opaque, 0);
+ for (i = 0; i < nfds; i++) {
+ void *opaque = &fds[i];
+ g_array_append_val(qpoll->gpollfds, fds[i]);
+ g_array_append_val(qpoll->opaque, opaque);
+ assert(qpoll->gpollfds->len == qpoll->opaque->len);
+ }
+ return nfds;
+}
+
+int qemu_poll_get_events(QEMUPoll *qpoll,
+ QEMUPollEvent *events,
+ int max_events)
+{
+ int i;
+ int r = 0;
+ for (i = 0; i < qpoll->gpollfds->len && r < max_events; i++) {
+ GPollFD *fd = &g_array_index(qpoll->gpollfds, GPollFD, i);
+ if (fd->revents & fd->events) {
+ events[r].fd = fd->fd;
+ events[r].revents = fd->revents;
+ events[r].events = fd->events;
+ events[r].opaque = g_array_index(qpoll->opaque, void *, i);
+ r++;
+ }
+ }
+ return r;
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH for-2.3 2/5] posix-aio: Use QEMU poll interface
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 1/5] poll: Introduce QEMU Poll API Fam Zheng
@ 2014-11-25 8:07 ` Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll Fam Zheng
` (3 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-25 8:07 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi
The AIO handler list is only modified by aio_set_fd_handler, so we can
easily add del poll fd there. Initialize a QEMUPoll and keep track of
all the fds, so we don't need to rebuild a GPollFD array for g_poll in
aio_poll.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
aio-posix.c | 52 +++++++++++++++++++++-------------------------------
async.c | 5 +++--
include/block/aio.h | 7 +++++--
3 files changed, 29 insertions(+), 35 deletions(-)
diff --git a/aio-posix.c b/aio-posix.c
index d3ac06e..32323b3 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -17,6 +17,7 @@
#include "block/block.h"
#include "qemu/queue.h"
#include "qemu/sockets.h"
+#include "qemu/poll.h"
struct AioHandler
{
@@ -55,6 +56,7 @@ void aio_set_fd_handler(AioContext *ctx,
/* Are we deleting the fd handler? */
if (!io_read && !io_write) {
if (node) {
+ qemu_poll_del(ctx->qpoll, fd);
g_source_remove_poll(&ctx->source, &node->pfd);
/* If the lock is held, just mark the node as deleted */
@@ -71,13 +73,15 @@ void aio_set_fd_handler(AioContext *ctx,
}
}
} else {
- if (node == NULL) {
+ if (node) {
+ /* Remove the old */
+ qemu_poll_del(ctx->qpoll, fd);
+ g_source_remove_poll(&ctx->source, &node->pfd);
+ } else {
/* Alloc and insert if it's not already there */
node = g_malloc0(sizeof(AioHandler));
node->pfd.fd = fd;
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
-
- g_source_add_poll(&ctx->source, &node->pfd);
}
/* Update handler with latest information */
node->io_read = io_read;
@@ -87,6 +91,8 @@ void aio_set_fd_handler(AioContext *ctx,
node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+ qemu_poll_add(ctx->qpoll, fd, node->pfd.events, node);
+ g_source_add_poll(&ctx->source, &node->pfd);
}
aio_notify(ctx);
@@ -191,6 +197,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
AioHandler *node;
bool was_dispatching;
int ret;
+ int i;
bool progress;
was_dispatching = ctx->dispatching;
@@ -208,38 +215,21 @@ bool aio_poll(AioContext *ctx, bool blocking)
*/
aio_set_dispatching(ctx, !blocking);
- ctx->walking_handlers++;
-
- g_array_set_size(ctx->pollfds, 0);
-
- /* fill pollfds */
- QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- node->pollfds_idx = -1;
- if (!node->deleted && node->pfd.events) {
- GPollFD pfd = {
- .fd = node->pfd.fd,
- .events = node->pfd.events,
- };
- node->pollfds_idx = ctx->pollfds->len;
- g_array_append_val(ctx->pollfds, pfd);
- }
- }
-
- ctx->walking_handlers--;
-
/* wait until next event */
- ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
- ctx->pollfds->len,
- blocking ? aio_compute_timeout(ctx) : 0);
+ ret = qemu_poll(ctx->qpoll, blocking ? aio_compute_timeout(ctx) : 0);
/* if we have any readable fds, dispatch event */
if (ret > 0) {
- QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (node->pollfds_idx != -1) {
- GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
- node->pollfds_idx);
- node->pfd.revents = pfd->revents;
- }
+ int r;
+ g_array_set_size(ctx->events, ret);
+ r = qemu_poll_get_events(ctx->qpoll,
+ (QEMUPollEvent *)ctx->events->data,
+ ret);
+ assert(r == ret);
+ for (i = 0; i < r; i++) {
+ QEMUPollEvent *e = &g_array_index(ctx->events, QEMUPollEvent, i);
+ node = e->opaque;
+ node->pfd.revents = e->revents;
}
}
diff --git a/async.c b/async.c
index 6e1b282..443a674 100644
--- a/async.c
+++ b/async.c
@@ -27,6 +27,7 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "qemu/poll.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -232,7 +233,6 @@ aio_ctx_finalize(GSource *source)
event_notifier_cleanup(&ctx->notifier);
rfifolock_destroy(&ctx->lock);
qemu_mutex_destroy(&ctx->bh_lock);
- g_array_free(ctx->pollfds, TRUE);
timerlistgroup_deinit(&ctx->tlg);
}
@@ -300,10 +300,11 @@ AioContext *aio_context_new(Error **errp)
error_setg_errno(errp, -ret, "Failed to initialize event notifier");
return NULL;
}
+ ctx->qpoll = qemu_poll_new();
+ ctx->events = g_array_new(FALSE, FALSE, sizeof(QEMUPollEvent));
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
event_notifier_test_and_clear);
- ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
diff --git a/include/block/aio.h b/include/block/aio.h
index 6bf0e04..c36b8c1 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -82,8 +82,11 @@ struct AioContext {
/* Used for aio_notify. */
EventNotifier notifier;
- /* GPollFDs for aio_poll() */
- GArray *pollfds;
+ /* qemu_poll context */
+ QEMUPoll *qpoll;
+
+ /* QEMUPollEvents for qemu_poll_get_events() */
+ GArray *events;
/* Thread pool for performing work and receiving completion callbacks */
struct ThreadPool *thread_pool;
--
1.9.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 1/5] poll: Introduce QEMU Poll API Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 2/5] posix-aio: Use QEMU poll interface Fam Zheng
@ 2014-11-25 8:07 ` Fam Zheng
2014-11-25 13:38 ` Stefan Hajnoczi
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 4/5] main-loop: Replace qemu_poll_ns with qemu_poll Fam Zheng
` (2 subsequent siblings)
5 siblings, 1 reply; 11+ messages in thread
From: Fam Zheng @ 2014-11-25 8:07 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi
This implements qemu_poll with epoll. The only complex part is
qemu_poll_set_fds, which will sync up epollfd with epoll_ctl by
computing the symmetric difference of previous and new fds.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
Makefile.objs | 4 +-
poll-linux.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 219 insertions(+), 1 deletion(-)
create mode 100644 poll-linux.c
diff --git a/Makefile.objs b/Makefile.objs
index 57184eb..ea86cb8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,7 +9,9 @@ util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
block-obj-y = async.o thread-pool.o
block-obj-y += nbd.o block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
-block-obj-$(CONFIG_POSIX) += aio-posix.o poll-glib.o
+block-obj-$(CONFIG_POSIX) += aio-posix.o
+block-obj-$(CONFIG_EPOLL) += poll-linux.o
+block-obj-$(if $(CONFIG_EPOLL),n,$(CONFIG_POSIX)) += poll-glib.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
diff --git a/poll-linux.c b/poll-linux.c
new file mode 100644
index 0000000..a5fc201
--- /dev/null
+++ b/poll-linux.c
@@ -0,0 +1,216 @@
+/*
+ * epoll implementation for QEMU Poll API
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <sys/epoll.h>
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/poll.h"
+
+struct QEMUPoll {
+ int epollfd;
+ struct epoll_event *events;
+ int max_events;
+ int nevents;
+ GHashTable *fds;
+};
+
+QEMUPoll *qemu_poll_new(void)
+{
+ int epollfd;
+ QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
+ epollfd = epoll_create1(EPOLL_CLOEXEC);
+ if (epollfd < 0) {
+ perror("epoll_create1:");
+ abort();
+ }
+ qpoll->epollfd = epollfd;
+ qpoll->max_events = 1;
+ g_free(qpoll->events);
+ qpoll->events = g_new(struct epoll_event, 1);
+ qpoll->fds = g_hash_table_new_full(g_int_hash, g_int_equal,
+ NULL, g_free);
+ return qpoll;
+}
+
+void qemu_poll_free(QEMUPoll *qpoll)
+{
+ g_free(qpoll->events);
+ g_hash_table_destroy(qpoll->fds);
+ close(qpoll->epollfd);
+ g_free(qpoll);
+}
+
+int qemu_poll(QEMUPoll *qpoll, int64_t timeout_ns)
+{
+ int r;
+ r = epoll_wait(qpoll->epollfd,
+ qpoll->events,
+ qpoll->max_events,
+ qemu_timeout_ns_to_ms(timeout_ns));
+ qpoll->nevents = r;
+ return r;
+}
+
+static inline uint32_t epoll_event_from_gio_events(int gio_events)
+{
+
+ return (gio_events & G_IO_IN ? EPOLLIN : 0) |
+ (gio_events & G_IO_OUT ? EPOLLOUT : 0) |
+ (gio_events & G_IO_ERR ? EPOLLERR : 0) |
+ (gio_events & G_IO_HUP ? EPOLLHUP : 0);
+}
+
+
+/* Add an fd to poll. Return -EEXIST if fd already registered. */
+int qemu_poll_add(QEMUPoll *qpoll, int fd, int gio_events, void *opaque)
+{
+ int ret;
+ struct epoll_event ev;
+ QEMUPollEvent *e;
+
+ ev.events = epoll_event_from_gio_events(gio_events);
+ ev.data.fd = fd;
+ ret = epoll_ctl(qpoll->epollfd, EPOLL_CTL_ADD, fd, &ev);
+ if (ret) {
+ ret = -errno;
+ return ret;
+ }
+ qpoll->max_events++;
+ qpoll->events = g_renew(struct epoll_event,
+ qpoll->events,
+ qpoll->max_events);
+ /* Shouldn't get here if the fd is already added since epoll_ctl would
+ * return -EEXIST, assert to be sure */
+ assert(g_hash_table_lookup(qpoll->fds, &fd) == NULL);
+ e = g_new0(QEMUPollEvent, 1);
+ e->fd = fd;
+ e->events = gio_events;
+ e->opaque = opaque;
+ g_hash_table_insert(qpoll->fds, &e->fd, e);
+ return ret;
+}
+
+/* Delete a previously added fd. Return -ENOENT if fd not registered. */
+int qemu_poll_del(QEMUPoll *qpoll, int fd)
+{
+ int ret;
+
+ if (!g_hash_table_lookup(qpoll->fds, &fd)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = epoll_ctl(qpoll->epollfd, EPOLL_CTL_DEL, fd, NULL);
+ if (ret) {
+ ret = -errno;
+ goto out;
+ }
+ qpoll->max_events--;
+ qpoll->events = g_renew(struct epoll_event,
+ qpoll->events,
+ qpoll->max_events);
+out:
+ g_hash_table_remove(qpoll->fds, &fd);
+ return ret;
+}
+
+static void qemu_poll_copy_fd(gpointer key, gpointer value, gpointer user_data)
+{
+ GHashTable *dst = user_data;
+ QEMUPollEvent *event, *copy;
+
+ event = value;
+ copy = g_new(QEMUPollEvent, 1);
+ *copy = *event;
+ g_hash_table_insert(dst, ©->fd, copy);
+}
+
+static void qemu_poll_del_fd(gpointer key, gpointer value, gpointer user_data)
+{
+ QEMUPoll *qpoll = user_data;
+ QEMUPollEvent *event = value;
+
+ qemu_poll_del(qpoll, event->fd);
+}
+
+int qemu_poll_set_fds(QEMUPoll *qpoll, GPollFD *fds, int nfds)
+{
+ int i;
+ int updated = 0;
+ int ret = nfds;
+ int old_size = g_hash_table_size(qpoll->fds);
+
+ for (i = 0; i < nfds; i++) {
+ int r;
+ GPollFD *fd = &fds[i];
+ QEMUPollEvent *e = g_hash_table_lookup(qpoll->fds, &fd->fd);
+ if (e) {
+ updated++;
+ assert(e->fd == fd->fd);
+ if (e->events == fd->events) {
+ e->opaque = fd;
+ continue;
+ }
+ r = qemu_poll_del(qpoll, fd->fd);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ }
+ r = qemu_poll_add(qpoll, fd->fd, fd->events, fd);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ }
+
+ if (updated < old_size) {
+ GHashTable *fds_copy;
+
+ fds_copy = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, g_free);
+ g_hash_table_foreach(qpoll->fds, qemu_poll_copy_fd, fds_copy);
+
+ /* Some fds need to be removed, as they are not seen in new fds */
+ for (i = 0; i < nfds; i++) {
+ GPollFD *fd = &fds[i];
+ g_hash_table_remove(fds_copy, &fd->fd);
+ }
+
+ g_hash_table_foreach(fds_copy, qemu_poll_del_fd, qpoll);
+ g_hash_table_destroy(fds_copy);
+ }
+ return ret;
+}
+
+int qemu_poll_get_events(QEMUPoll *qpoll,
+ QEMUPollEvent *events,
+ int max_events)
+{
+ int i;
+ QEMUPollEvent *p;
+ struct epoll_event *ev;
+ int fd;
+
+ for (i = 0; i < MIN(qpoll->nevents, max_events); i++) {
+ ev = &qpoll->events[i];
+ fd = ev->data.fd;
+ p = g_hash_table_lookup(qpoll->fds, &fd);
+ assert(p);
+
+ events[i].revents = ev->events;
+ events[i].opaque = p->opaque;
+ events[i].fd = fd;
+ events[i].events = p->events;
+ }
+ return i;
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll Fam Zheng
@ 2014-11-25 13:38 ` Stefan Hajnoczi
2014-11-26 1:48 ` Fam Zheng
0 siblings, 1 reply; 11+ messages in thread
From: Stefan Hajnoczi @ 2014-11-25 13:38 UTC (permalink / raw)
To: Fam Zheng; +Cc: Kevin Wolf, Paolo Bonzini, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 437 bytes --]
On Tue, Nov 25, 2014 at 04:07:57PM +0800, Fam Zheng wrote:
> +QEMUPoll *qemu_poll_new(void)
> +{
> + int epollfd;
> + QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
> + epollfd = epoll_create1(EPOLL_CLOEXEC);
> + if (epollfd < 0) {
> + perror("epoll_create1:");
> + abort();
> + }
> + qpoll->epollfd = epollfd;
> + qpoll->max_events = 1;
> + g_free(qpoll->events);
What is the point of this g_free() call?
[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll
2014-11-25 13:38 ` Stefan Hajnoczi
@ 2014-11-26 1:48 ` Fam Zheng
0 siblings, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-26 1:48 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Paolo Bonzini, qemu-devel
On Tue, 11/25 13:38, Stefan Hajnoczi wrote:
> On Tue, Nov 25, 2014 at 04:07:57PM +0800, Fam Zheng wrote:
> > +QEMUPoll *qemu_poll_new(void)
> > +{
> > + int epollfd;
> > + QEMUPoll *qpoll = g_new0(QEMUPoll, 1);
> > + epollfd = epoll_create1(EPOLL_CLOEXEC);
> > + if (epollfd < 0) {
> > + perror("epoll_create1:");
> > + abort();
> > + }
> > + qpoll->epollfd = epollfd;
> > + qpoll->max_events = 1;
> > + g_free(qpoll->events);
>
> What is the point of this g_free() call?
Bad rebase, I guess. Dropping.
Fam
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH for-2.3 4/5] main-loop: Replace qemu_poll_ns with qemu_poll
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
` (2 preceding siblings ...)
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 3/5] poll: Add epoll implementation for qemu_poll Fam Zheng
@ 2014-11-25 8:07 ` Fam Zheng
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 5/5] tests: Add test case for qemu_poll Fam Zheng
2014-11-25 13:52 ` [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Stefan Hajnoczi
5 siblings, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-25 8:07 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi
qemu_poll_set_fds + qemu_poll does the same, and when epoll is
available, it is faster.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
include/qemu/timer.h | 13 -------------
main-loop.c | 35 ++++++++++++++++++++++++++++++-----
qemu-timer.c | 21 ---------------------
3 files changed, 30 insertions(+), 39 deletions(-)
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 5f5210d..cd3371d 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -646,19 +646,6 @@ void timer_put(QEMUFile *f, QEMUTimer *ts);
int qemu_timeout_ns_to_ms(int64_t ns);
/**
- * qemu_poll_ns:
- * @fds: Array of file descriptors
- * @nfds: number of file descriptors
- * @timeout: timeout in nanoseconds
- *
- * Perform a poll like g_poll but with a timeout in nanoseconds.
- * See g_poll documentation for further details.
- *
- * Returns: number of fds ready
- */
-int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout);
-
-/**
* qemu_soonest_timeout:
* @timeout1: first timeout in nanoseconds (or -1 for infinite)
* @timeout2: second timeout in nanoseconds (or -1 for infinite)
diff --git a/main-loop.c b/main-loop.c
index 981bcb5..b70f929 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -29,6 +29,7 @@
#include "slirp/libslirp.h"
#include "qemu/main-loop.h"
#include "block/aio.h"
+#include "qemu/poll.h"
#ifndef _WIN32
@@ -130,6 +131,7 @@ void qemu_notify_event(void)
}
static GArray *gpollfds;
+static QEMUPoll *qpoll;
int qemu_init_main_loop(Error **errp)
{
@@ -150,6 +152,7 @@ int qemu_init_main_loop(Error **errp)
return -EMFILE;
}
gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+ qpoll = qemu_poll_new();
src = aio_get_g_source(qemu_aio_context);
g_source_attach(src, NULL);
g_source_unref(src);
@@ -207,6 +210,8 @@ static int os_host_main_loop_wait(int64_t timeout)
{
int ret;
static int spin_counter;
+ QEMUPollEvent events[1024];
+ int r, i;
glib_pollfds_fill(&timeout);
@@ -236,7 +241,17 @@ static int os_host_main_loop_wait(int64_t timeout)
spin_counter++;
}
- ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
+ qemu_poll_set_fds(qpoll, (GPollFD *)gpollfds->data, gpollfds->len);
+ ret = qemu_poll(qpoll, timeout);
+ if (ret > 0) {
+ ret = MIN(ret, sizeof(events) / sizeof(QEMUPollEvent));
+ r = qemu_poll_get_events(qpoll, events, ret);
+ for (i = 0; i < r; i++) {
+ GPollFD *fd = events[i].opaque;
+ fd->revents = events[i].revents;
+ }
+ ret = r;
+ }
if (timeout) {
qemu_mutex_lock_iothread();
@@ -389,9 +404,11 @@ static int os_host_main_loop_wait(int64_t timeout)
{
GMainContext *context = g_main_context_default();
GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
+ QEMUPollEvent poll_events[1024 * 2];
int select_ret = 0;
- int g_poll_ret, ret, i, n_poll_fds;
+ int nevents, ret, i, n_poll_fds;
PollingEntry *pe;
+ QEMUPollEvent *events;
WaitObjects *w = &wait_objects;
gint poll_timeout;
int64_t poll_timeout_ns;
@@ -441,10 +458,18 @@ static int os_host_main_loop_wait(int64_t timeout)
poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
qemu_mutex_unlock_iothread();
- g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
+
+ qemu_poll_set_fds(qpoll, (GPollFD *)poll_fds, n_poll_fds + w->num)
+ nevents = qemu_poll(qpoll, poll_timeout_ns);
qemu_mutex_lock_iothread();
- if (g_poll_ret > 0) {
+ if (nevents > 0) {
+ r = qemu_poll_get_events(qpoll, poll_events, nevents);
+ for (i = 0; i < r; i++) {
+ GPollFD *fd = poll_events[i].opaque;
+ fd->revents = poll_events[i].revents;
+ }
+
for (i = 0; i < w->num; i++) {
w->revents[i] = poll_fds[n_poll_fds + i].revents;
}
@@ -459,7 +484,7 @@ static int os_host_main_loop_wait(int64_t timeout)
g_main_context_dispatch(context);
}
- return select_ret || g_poll_ret;
+ return select_ret || nevents;
}
#endif
diff --git a/qemu-timer.c b/qemu-timer.c
index 00a5d35..4500235 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -303,27 +303,6 @@ int qemu_timeout_ns_to_ms(int64_t ns)
return (int) ms;
}
-
-/* qemu implementation of g_poll which uses a nanosecond timeout but is
- * otherwise identical to g_poll
- */
-int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
-{
-#ifdef CONFIG_PPOLL
- if (timeout < 0) {
- return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
- } else {
- struct timespec ts;
- ts.tv_sec = timeout / 1000000000LL;
- ts.tv_nsec = timeout % 1000000000LL;
- return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
- }
-#else
- return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
-#endif
-}
-
-
void timer_init(QEMUTimer *ts,
QEMUTimerList *timer_list, int scale,
QEMUTimerCB *cb, void *opaque)
--
1.9.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH for-2.3 5/5] tests: Add test case for qemu_poll
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
` (3 preceding siblings ...)
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 4/5] main-loop: Replace qemu_poll_ns with qemu_poll Fam Zheng
@ 2014-11-25 8:07 ` Fam Zheng
2014-11-25 13:52 ` [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Stefan Hajnoczi
5 siblings, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-25 8:07 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi
Signed-off-by: Fam Zheng <famz@redhat.com>
---
tests/Makefile | 2 +
tests/test-poll.c | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 274 insertions(+)
create mode 100644 tests/test-poll.c
diff --git a/tests/Makefile b/tests/Makefile
index 16f0e4c..79f399d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -41,6 +41,7 @@ check-unit-$(CONFIG_POSIX) += tests/test-rfifolock$(EXESUF)
check-unit-y += tests/test-throttle$(EXESUF)
gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
+check-unit-y += tests/test-poll$(EXESUF)
check-unit-y += tests/test-thread-pool$(EXESUF)
gcov-files-test-thread-pool-y = thread-pool.c
gcov-files-test-hbitmap-y = util/hbitmap.c
@@ -241,6 +242,7 @@ tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a
tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a libqemustub.a
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
+tests/test-poll$(EXESUF): tests/test-poll.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o libqemuutil.a libqemustub.a
tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
diff --git a/tests/test-poll.c b/tests/test-poll.c
new file mode 100644
index 0000000..75074d8
--- /dev/null
+++ b/tests/test-poll.c
@@ -0,0 +1,272 @@
+/*
+ * QTest testcase for QEMU poll
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include "qemu/poll.h"
+#include "qemu/event_notifier.h"
+
+static EventNotifier *poll_add_one(QEMUPoll *qpoll)
+{
+ EventNotifier *e = g_new(EventNotifier, 1);
+
+ event_notifier_init(e, false);
+ qemu_poll_add(qpoll, event_notifier_get_fd(e),
+ G_IO_IN,
+ NULL);
+ return e;
+}
+
+static int poll_del_one(QEMUPoll *qpoll, EventNotifier *e)
+{
+ int r = qemu_poll_del(qpoll, event_notifier_get_fd(e));
+ event_notifier_cleanup(e);
+ g_free(e);
+ return r;
+}
+
+static void test_poll_single(void)
+{
+ QEMUPoll *qpoll;
+ EventNotifier *e;
+ int r, i;
+
+ qpoll = qemu_poll_new();
+ g_assert(qpoll);
+
+ e = poll_add_one(qpoll);
+
+ /* Write some data and poll */
+ event_notifier_set(e);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 1);
+
+ /* Clear data and poll */
+ r = event_notifier_test_and_clear(e);
+ g_assert(r);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 0);
+
+ /* Write a lot of data and poll */
+ for (i = 0; i < 10000; i++) {
+ event_notifier_set(e);
+ }
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 1);
+
+ /* Clear data and poll again */
+ r = event_notifier_test_and_clear(e);
+ g_assert(r);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 0);
+
+ /* Clean up */
+ poll_del_one(qpoll, e);
+ qemu_poll_free(qpoll);
+}
+
+static void test_poll_multiple(void)
+{
+ QEMUPoll *qpoll;
+ const int N = 32;
+ EventNotifier *e[N];
+ QEMUPollEvent events[N];
+ int r, s, i;
+
+ qpoll = qemu_poll_new();
+ g_assert(qpoll);
+
+ for (i = 0; i < N; i++) {
+ e[i] = poll_add_one(qpoll);
+ }
+
+ /* Write some data for each and poll */
+ for (i = 0; i < N; i++) {
+ event_notifier_set(e[i]);
+ }
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, N);
+
+ /* Clear data and poll */
+ for (i = 0; i < N; i++) {
+ r = event_notifier_test_and_clear(e[i]);
+ g_assert(r);
+ }
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 0);
+
+ /* Write some data for first half and poll */
+ for (i = 0; i < N / 2; i++) {
+ event_notifier_set(e[i]);
+ }
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, N / 2);
+ s = qemu_poll_get_events(qpoll, events, N);
+ g_assert_cmpint(s, ==, r);
+ for (i = 0; i < s; i++) {
+ g_assert(events[i].revents & G_IO_IN);
+ }
+
+ /* Clean up */
+ for (i = 0; i < N; i++) {
+ poll_del_one(qpoll, e[i]);
+ }
+ qemu_poll_free(qpoll);
+}
+
+static void test_poll_add_del(void)
+{
+ QEMUPoll *qpoll;
+ EventNotifier *e1, *e2;
+ QEMUPollEvent events[2];
+ int r;
+
+ qpoll = qemu_poll_new();
+ g_assert(qpoll);
+
+ e1 = poll_add_one(qpoll);
+ e2 = poll_add_one(qpoll);
+
+ /* Write some data for each and poll */
+ event_notifier_set(e1);
+ event_notifier_set(e2);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 2);
+
+ /* Clear e1 and poll */
+ r = event_notifier_test_and_clear(e1);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 1);
+ r = qemu_poll_get_events(qpoll, events, 2);
+ g_assert_cmpint(r, ==, 1);
+ g_assert_cmpint(events[0].fd, ==, event_notifier_get_fd(e2));
+
+ /* Write to both but remove one and poll the other */
+ event_notifier_set(e1);
+ event_notifier_set(e2);
+ qemu_poll_del(qpoll, event_notifier_get_fd(e2));
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 1);
+ r = qemu_poll_get_events(qpoll, events, 2);
+ g_assert_cmpint(r, ==, 1);
+ g_assert_cmpint(events[0].fd, ==, event_notifier_get_fd(e1));
+
+ r = qemu_poll_del(qpoll, event_notifier_get_fd(e2));
+ g_assert_cmpint(r, ==, -ENOENT);
+
+ /* Add it back and poll both */
+ qemu_poll_add(qpoll, event_notifier_get_fd(e2),
+ G_IO_IN, NULL);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 2);
+
+ event_notifier_test_and_clear(e1);
+ event_notifier_test_and_clear(e2);
+
+ r = qemu_poll_add(qpoll, event_notifier_get_fd(e2),
+ G_IO_IN, NULL);
+ g_assert_cmpint(r, ==, -EEXIST);
+
+ /* Clean up */
+ poll_del_one(qpoll, e1);
+ poll_del_one(qpoll, e2);
+ qemu_poll_free(qpoll);
+}
+
+static void gpollfd_fill(GPollFD *fds, int start, EventNotifier **e, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ fds[i + start] = (GPollFD) {
+ .fd = event_notifier_get_fd(e[i]),
+ .events = G_IO_IN,
+ };
+ }
+}
+
+static void test_poll_set_fds(void)
+{
+ QEMUPoll *qpoll;
+ const int M = 20;
+ const int N = 10;
+ EventNotifier *em[M], *en[N];
+ GPollFD fds[M + N];
+ int i, r;
+
+ qpoll = qemu_poll_new();
+ g_assert(qpoll);
+
+ for (i = 0; i < M; i++) {
+ en[i] = poll_add_one(qpoll);
+ }
+
+ for (i = 0; i < M; i++) {
+ em[i] = poll_add_one(qpoll);
+ }
+
+ gpollfd_fill(fds, 0, em, M);
+ gpollfd_fill(fds, M, en, N);
+
+ /* Set N */
+ for (i = 0; i < N; i++) {
+ event_notifier_set(en[i]);
+ }
+
+ /* Poll M + N should get N */
+ r = qemu_poll_set_fds(qpoll, fds, M + N);
+ g_assert_cmpint(r, ==, M + N);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, N);
+
+ /* Poll M should get 0 */
+ r = qemu_poll_set_fds(qpoll, fds, M);
+ g_assert_cmpint(r, ==, M);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, 0);
+
+ /* Poll N should get N */
+ r = qemu_poll_set_fds(qpoll, fds + M, N);
+ g_assert_cmpint(r, ==, N);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, N);
+
+ /* Poll M + N / 2 should get N / 2 */
+ r = qemu_poll_set_fds(qpoll, fds, M + N / 2);
+ g_assert_cmpint(r, ==, M + N / 2);
+ r = qemu_poll(qpoll, 0);
+ g_assert_cmpint(r, ==, N / 2);
+
+ /* Clean up */
+ for (i = 0; i < M; i++) {
+ poll_del_one(qpoll, em[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ poll_del_one(qpoll, en[i]);
+ }
+ qemu_poll_free(qpoll);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/poll/single", test_poll_single);
+ g_test_add_func("/poll/multiple", test_poll_multiple);
+ g_test_add_func("/poll/add-del", test_poll_add_del);
+ g_test_add_func("/poll/set-fds", test_poll_set_fds);
+
+ ret = g_test_run();
+
+ return ret;
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction
2014-11-25 8:07 [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Fam Zheng
` (4 preceding siblings ...)
2014-11-25 8:07 ` [Qemu-devel] [PATCH for-2.3 5/5] tests: Add test case for qemu_poll Fam Zheng
@ 2014-11-25 13:52 ` Stefan Hajnoczi
2014-11-25 13:56 ` Paolo Bonzini
2014-11-26 5:27 ` Fam Zheng
5 siblings, 2 replies; 11+ messages in thread
From: Stefan Hajnoczi @ 2014-11-25 13:52 UTC (permalink / raw)
To: Fam Zheng; +Cc: Kevin Wolf, Paolo Bonzini, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1813 bytes --]
On Tue, Nov 25, 2014 at 04:07:54PM +0800, Fam Zheng wrote:
> ppoll(2) doesn't scale as well as epoll: The elapsed time of the syscall is
> linear to the number of fd's we poll, which hurts performance a bit when the
> number of devices are many, or when a virtio device registers many virtqueues
> (virtio-serial, for instance).
>
> To show some data from my test on current master:
>
> - As a base point (10~20 fd's), it takes 22000 ns for each qemu_poll_ns.
> - Add 10 virtio-serial, which adds some 6 hundreds of fd's in the main loop.
> The time spent in qemu_poll_ns goes up to 75000 ns.
>
> This series introduces qemu_poll, which is implemented with g_poll and epoll,
> decided at configure time with CONFIG_EPOLL.
>
> After this change, the times to do the same thing with qemu_poll (more
> precisely, with a sequence of qemu_poll_set_fds(), qemu_poll(),
> qemu_poll_get_events() followed by syncing back to gpollfds), are reduced to
> 21000 ns and 25000 ns, respectively.
>
> We are still not O(1) because as a transition, the qemu_poll_set_fds before
> qemu_poll is not optimized out yet.
You didn't mention the change from nanosecond to millisecond timeouts.
QEMU did not use g_poll() for a long time because g_poll() only provides
milliseconds. It seems this patch series undoes the work that has been
done to keep nanosecond timeouts in QEMU.
Do you think it is okay to forget about <1 ms timeout precision?
If we go ahead with this, we'll need to rethink other timeouts in QEMU.
For example, is there a point in setting timer slack to 1 ns if we
cannot even specify ns wait times?
Perhaps timerfd is needed before we can use epoll. Hopefully the
overall performance effect will be positive with epoll + timerfd,
compared to ppoll().
Stefan
[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction
2014-11-25 13:52 ` [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Stefan Hajnoczi
@ 2014-11-25 13:56 ` Paolo Bonzini
2014-11-26 5:27 ` Fam Zheng
1 sibling, 0 replies; 11+ messages in thread
From: Paolo Bonzini @ 2014-11-25 13:56 UTC (permalink / raw)
To: Stefan Hajnoczi, Fam Zheng; +Cc: Kevin Wolf, qemu-devel
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 25/11/2014 14:52, Stefan Hajnoczi wrote:
> Do you think it is okay to forget about <1 ms timeout precision?
>
> If we go ahead with this, we'll need to rethink other timeouts in
> QEMU. For example, is there a point in setting timer slack to 1 ns
> if we cannot even specify ns wait times?
>
> Perhaps timerfd is needed before we can use epoll. Hopefully the
> overall performance effect will be positive with epoll + timerfd,
> compared to ppoll().
You can also use POLLIN with the epoll file descriptor, i.e. do ppoll
followed (if there's no timeout) by epoll_wait with zero timeout.
Paolo
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2
iQEcBAEBAgAGBQJUdIqEAAoJEL/70l94x66DdtsH/RIXbk6faPWdb3MXaHmAoH1E
z7q8cGuLPkI7XT54BYbiBFFn4MqS0XxLHLJ69CksFEC5u4wNl9vqsLLJrN/+uZe5
PznE7madjam32ZVtUbzfRwrBtO0KFgyXEiZfR9stAVXW+/KIUAaWU5rQ2IW6GqHg
skt2GGmKfrCbyvmxVhSt2oMDRZ7O2Tquox6eLYizQX6JJ3/5vDqpzXTKE/Ix+wnt
R3FA3IZkQuZMQPAFsMKj0AajN178RGiqXaB3UIR2YmPN1DWyjkfN05WmPgMpvZa/
eX70AhUdOjLzncfLU3bX9EAsml0s2Hsj5gKRYT6B5d8YK2b0ba3dCqgZRPV3+bo=
=8Awx
-----END PGP SIGNATURE-----
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction
2014-11-25 13:52 ` [Qemu-devel] [PATCH for-2.3 0/5] aio: Support epoll by introducing qemu_poll abstraction Stefan Hajnoczi
2014-11-25 13:56 ` Paolo Bonzini
@ 2014-11-26 5:27 ` Fam Zheng
1 sibling, 0 replies; 11+ messages in thread
From: Fam Zheng @ 2014-11-26 5:27 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Kevin Wolf, Paolo Bonzini, qemu-devel
On Tue, 11/25 13:52, Stefan Hajnoczi wrote:
> On Tue, Nov 25, 2014 at 04:07:54PM +0800, Fam Zheng wrote:
> > ppoll(2) doesn't scale as well as epoll: The elapsed time of the syscall is
> > linear to the number of fd's we poll, which hurts performance a bit when the
> > number of devices are many, or when a virtio device registers many virtqueues
> > (virtio-serial, for instance).
> >
> > To show some data from my test on current master:
> >
> > - As a base point (10~20 fd's), it takes 22000 ns for each qemu_poll_ns.
> > - Add 10 virtio-serial, which adds some 6 hundreds of fd's in the main loop.
> > The time spent in qemu_poll_ns goes up to 75000 ns.
> >
> > This series introduces qemu_poll, which is implemented with g_poll and epoll,
> > decided at configure time with CONFIG_EPOLL.
> >
> > After this change, the times to do the same thing with qemu_poll (more
> > precisely, with a sequence of qemu_poll_set_fds(), qemu_poll(),
> > qemu_poll_get_events() followed by syncing back to gpollfds), are reduced to
> > 21000 ns and 25000 ns, respectively.
> >
> > We are still not O(1) because as a transition, the qemu_poll_set_fds before
> > qemu_poll is not optimized out yet.
>
> You didn't mention the change from nanosecond to millisecond timeouts.
>
> QEMU did not use g_poll() for a long time because g_poll() only provides
> milliseconds. It seems this patch series undoes the work that has been
> done to keep nanosecond timeouts in QEMU.
>
> Do you think it is okay to forget about <1 ms timeout precision?
>
> If we go ahead with this, we'll need to rethink other timeouts in QEMU.
> For example, is there a point in setting timer slack to 1 ns if we
> cannot even specify ns wait times?
>
> Perhaps timerfd is needed before we can use epoll. Hopefully the
> overall performance effect will be positive with epoll + timerfd,
> compared to ppoll().
>
Good point! Thanks. I'll look into it.
Fam
^ permalink raw reply [flat|nested] 11+ messages in thread