* [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
@ 2013-03-13 5:59 Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 1/2] net: port tap " Liu Ping Fan
` (2 more replies)
0 siblings, 3 replies; 31+ messages in thread
From: Liu Ping Fan @ 2013-03-13 5:59 UTC (permalink / raw)
To: qemu-devel
Cc: Stefan Hajnoczi, Paolo Bonzini, mdroth, Anthony Liguori,
Michael S. Tsirkin
These series aim to port network backend onto glib, and
prepare for moving towards making network layer mutlit-thread.
The brief of the whole aim and plan is documented on
http://wiki.qemu.org/Features/network_reentrant
In these series, attach each NetClientState with a GSource
At the first, I use AioContext instead of GSource, but after discussion,
I think with GSource, we can integrated with glib more closely.
Liu Ping Fan (2):
net: port tap onto glib
net: port hub onto glib
hw/qdev-properties-system.c | 1 +
include/net/net.h | 24 +++++++++++
include/net/queue.h | 14 +++++++
net/hub.c | 34 +++++++++++++++-
net/net.c | 91 +++++++++++++++++++++++++++++++++++++++++++
net/queue.c | 4 +-
net/tap.c | 57 ++++++++++++++++++++++-----
7 files changed, 210 insertions(+), 15 deletions(-)
--
1.7.4.4
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [RFC PATCH 1/2] net: port tap onto glib
2013-03-13 5:59 [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib Liu Ping Fan
@ 2013-03-13 5:59 ` Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 2/2] net: port hub " Liu Ping Fan
2013-03-13 10:58 ` [Qemu-devel] [RFC PATCH 0/2] port network layer " Paolo Bonzini
2 siblings, 0 replies; 31+ messages in thread
From: Liu Ping Fan @ 2013-03-13 5:59 UTC (permalink / raw)
To: qemu-devel
Cc: Stefan Hajnoczi, Paolo Bonzini, mdroth, Anthony Liguori,
Michael S. Tsirkin
Bind each NetClientState with a GSource(ie,NetClientSource). Currently,
these GSource attached with default context, but in future, after
resolving the race between handlers and the interface exposed by NetClientInfo
and other re-entrant issue, we can run NetClientState on different GMainContext
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
include/net/net.h | 21 ++++++++++++
net/net.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++
net/tap.c | 57 +++++++++++++++++++++++++++------
3 files changed, 156 insertions(+), 11 deletions(-)
diff --git a/include/net/net.h b/include/net/net.h
index cb049a1..4f3eed1 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -44,6 +44,7 @@ typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
typedef void (NetCleanup) (NetClientState *);
typedef void (LinkStatusChanged)(NetClientState *);
typedef void (NetClientDestructor)(NetClientState *);
+typedef void (NetClientBindCtx)(NetClientState *, GMainContext *);
typedef struct NetClientInfo {
NetClientOptionsKind type;
@@ -55,8 +56,22 @@ typedef struct NetClientInfo {
NetCleanup *cleanup;
LinkStatusChanged *link_status_changed;
NetPoll *poll;
+ NetClientBindCtx *bind_ctx;
} NetClientInfo;
+typedef bool (*Pollable)(void *opaque);
+
+typedef struct NetClientSource {
+ GSource source;
+ /* fix me, to expand as array in future */
+ GPollFD gfd;
+ bool last_rd_sts;
+ bool last_wr_sts;
+ Pollable readable;
+ Pollable writeable;
+ void *opaque;
+} NetClientSource;
+
struct NetClientState {
NetClientInfo *info;
int link_down;
@@ -69,6 +84,7 @@ struct NetClientState {
unsigned receive_disabled : 1;
NetClientDestructor *destructor;
unsigned int queue_index;
+ NetClientSource *nsrc;
};
typedef struct NICState {
@@ -155,6 +171,9 @@ extern int default_net;
extern const char *legacy_tftp_prefix;
extern const char *legacy_bootp_filename;
+void net_init_gsource(NetClientState *nc, NetClientSource *nsrc, int fd,
+ int events, Pollable rd, Pollable wr, GSourceFunc f, void *opaque);
+
int net_client_init(QemuOpts *opts, int is_netdev, Error **errp);
int net_client_parse(QemuOptsList *opts_list, const char *str);
int net_init_clients(void);
@@ -190,4 +209,6 @@ unsigned compute_mcast_idx(const uint8_t *ep);
.offset = vmstate_offset_macaddr(_state, _field), \
}
+extern GSourceFuncs net_gsource_funcs;
+
#endif
diff --git a/net/net.c b/net/net.c
index f3d67f8..ce68267 100644
--- a/net/net.c
+++ b/net/net.c
@@ -299,6 +299,9 @@ static void qemu_free_net_client(NetClientState *nc)
}
g_free(nc->name);
g_free(nc->model);
+ if (nc->nsrc) {
+ g_source_remove(g_source_get_id(&nc->nsrc->source));
+ }
if (nc->destructor) {
nc->destructor(nc);
}
@@ -558,6 +561,92 @@ qemu_sendv_packet(NetClientState *nc, const struct iovec *iov, int iovcnt)
return qemu_sendv_packet_async(nc, iov, iovcnt, NULL);
}
+static gboolean prepare(GSource *src, gint *time)
+{
+ NetClientSource *nsrc = (NetClientSource *)src;
+ bool rd, wr, change = false;
+
+ if (!nsrc->readable && !nsrc->writeable) {
+ return false;
+ }
+ if (nsrc->readable) {
+ rd = nsrc->readable(nsrc->opaque);
+ if (nsrc->last_rd_sts != rd) {
+ nsrc->last_rd_sts = rd;
+ change = true;
+ }
+ }
+ if (nsrc->writeable) {
+ wr = nsrc->writeable(nsrc->opaque);
+ if (nsrc->last_wr_sts != wr) {
+ nsrc->last_wr_sts = wr;
+ change = true;
+ }
+ }
+ if (!change) {
+ return false;
+ }
+
+ g_source_remove_poll(src, &nsrc->gfd);
+ if (rd) {
+ nsrc->gfd.events |= G_IO_IN;
+ } else {
+ nsrc->gfd.events &= ~G_IO_IN;
+ }
+ if (wr) {
+ nsrc->gfd.events |= G_IO_OUT;
+ } else {
+ nsrc->gfd.events &= ~G_IO_OUT;
+ }
+ g_source_add_poll(src, &nsrc->gfd);
+ return false;
+}
+
+static gboolean check(GSource *src)
+{
+ NetClientSource *nsrc = (NetClientSource *)src;
+
+ if (nsrc->gfd.revents & nsrc->gfd.events) {
+ return true;
+ }
+ return false;
+}
+
+static gboolean dispatch(GSource *src, GSourceFunc cb, gpointer data)
+{
+ gboolean ret = false;
+
+ if (cb) {
+ ret = cb(data);
+ }
+ return ret;
+}
+
+GSourceFuncs net_gsource_funcs = {
+ prepare,
+ check,
+ dispatch,
+ NULL
+};
+
+void net_init_gsource(NetClientState *nc, NetClientSource *nsrc, int fd,
+ int events, Pollable rd, Pollable wr, GSourceFunc f, void *opaque)
+{
+ nsrc->gfd.fd = fd;
+ nsrc->gfd.events = events;
+ nsrc->opaque = opaque;
+ nsrc->readable = rd;
+ nsrc->writeable = wr;
+ nsrc->last_rd_sts = false;
+ nsrc->last_wr_sts = false;
+ nc->nsrc = nsrc;
+ /* add PollFD if it wont change, otherwise left for GSource prepare */
+ if (!rd && !wr) {
+ g_source_add_poll(&nsrc->source, &nsrc->gfd);
+ }
+ g_source_set_callback(&nsrc->source, f, nsrc, NULL);
+}
+
NetClientState *qemu_find_netdev(const char *id)
{
NetClientState *nc;
diff --git a/net/tap.c b/net/tap.c
index daab350..a7ba666 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -70,25 +70,48 @@ static int tap_can_send(void *opaque);
static void tap_send(void *opaque);
static void tap_writable(void *opaque);
-static void tap_update_fd_handler(TAPState *s)
+static bool readable(void *opaque)
{
- qemu_set_fd_handler2(s->fd,
- s->read_poll && s->enabled ? tap_can_send : NULL,
- s->read_poll && s->enabled ? tap_send : NULL,
- s->write_poll && s->enabled ? tap_writable : NULL,
- s);
+ TAPState *s = (TAPState *)opaque;
+
+ if (s->enabled && s->read_poll &&
+ tap_can_send(s)) {
+ return true;
+ }
+ return false;
+}
+
+static bool writeable(void *opaque)
+{
+ TAPState *s = (TAPState *)opaque;
+
+ if (s->enabled && s->write_poll) {
+ return true;
+ }
+ return false;
+}
+
+static gboolean tap_handler(gpointer data)
+{
+ NetClientSource *nsrc = (NetClientSource *)data;
+
+ if (nsrc->gfd.revents & G_IO_IN) {
+ tap_send(nsrc->opaque);
+ }
+ if (nsrc->gfd.revents & G_IO_OUT) {
+ tap_writable(nsrc->opaque);
+ }
+ return true;
}
static void tap_read_poll(TAPState *s, bool enable)
{
s->read_poll = enable;
- tap_update_fd_handler(s);
}
static void tap_write_poll(TAPState *s, bool enable)
{
s->write_poll = enable;
- tap_update_fd_handler(s);
}
static void tap_writable(void *opaque)
@@ -298,6 +321,7 @@ static void tap_cleanup(NetClientState *nc)
static void tap_poll(NetClientState *nc, bool enable)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ /* fixme, when tap backend on another thread, the disable should be sync */
tap_read_poll(s, enable);
tap_write_poll(s, enable);
}
@@ -309,6 +333,11 @@ int tap_get_fd(NetClientState *nc)
return s->fd;
}
+static void tap_bind_ctx(NetClientState *nc, GMainContext *ctx)
+{
+ g_source_attach(&nc->nsrc->source, ctx);
+}
+
/* fd support */
static NetClientInfo net_tap_info = {
@@ -319,6 +348,7 @@ static NetClientInfo net_tap_info = {
.receive_iov = tap_receive_iov,
.poll = tap_poll,
.cleanup = tap_cleanup,
+ .bind_ctx = tap_bind_ctx,
};
static TAPState *net_tap_fd_init(NetClientState *peer,
@@ -596,13 +626,18 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
int vnet_hdr, int fd)
{
TAPState *s;
+ NetClientSource *nsrc;
s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
if (!s) {
close(fd);
return -1;
}
-
+ nsrc = (NetClientSource *)g_source_new(&net_gsource_funcs,
+ sizeof(NetClientSource));
+ net_init_gsource(&s->nc, nsrc, s->fd, G_IO_IN|G_IO_OUT,
+ readable, writeable, tap_handler, s);
+ s->nc.info->bind_ctx(&s->nc, NULL);
if (tap_set_sndbuf(s->fd, tap) < 0) {
return -1;
}
@@ -843,8 +878,8 @@ int tap_enable(NetClientState *nc)
} else {
ret = tap_fd_enable(s->fd);
if (ret == 0) {
+ /*fixme, will be sync to ensure handler not be called */
s->enabled = true;
- tap_update_fd_handler(s);
}
return ret;
}
@@ -861,8 +896,8 @@ int tap_disable(NetClientState *nc)
ret = tap_fd_disable(s->fd);
if (ret == 0) {
qemu_purge_queued_packets(nc);
+ /*fixme, will be sync to ensure handler not be called */
s->enabled = false;
- tap_update_fd_handler(s);
}
return ret;
}
--
1.7.4.4
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [RFC PATCH 2/2] net: port hub onto glib
2013-03-13 5:59 [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 1/2] net: port tap " Liu Ping Fan
@ 2013-03-13 5:59 ` Liu Ping Fan
2013-03-13 10:58 ` [Qemu-devel] [RFC PATCH 0/2] port network layer " Paolo Bonzini
2 siblings, 0 replies; 31+ messages in thread
From: Liu Ping Fan @ 2013-03-13 5:59 UTC (permalink / raw)
To: qemu-devel
Cc: Stefan Hajnoczi, Paolo Bonzini, mdroth, Anthony Liguori,
Michael S. Tsirkin
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
Attach each hubport with a GSource. Currently the GSource is attached
to default main context, and the hub still run on iothread, but in
future, after making the whole layer thread-safe, we will admit ports
to run on different thread(GMainContext).
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
hw/qdev-properties-system.c | 1 +
include/net/net.h | 7 +++++--
include/net/queue.h | 14 ++++++++++++++
net/hub.c | 34 ++++++++++++++++++++++++++++++++--
net/net.c | 1 +
net/queue.c | 4 ++--
6 files changed, 55 insertions(+), 6 deletions(-)
diff --git a/hw/qdev-properties-system.c b/hw/qdev-properties-system.c
index ce3af22..bb2448d 100644
--- a/hw/qdev-properties-system.c
+++ b/hw/qdev-properties-system.c
@@ -307,6 +307,7 @@ static void set_vlan(Object *obj, Visitor *v, void *opaque,
name, prop->info->name);
return;
}
+ hubport->info->bind_ctx(hubport, NULL);
*ptr = hubport;
}
diff --git a/include/net/net.h b/include/net/net.h
index 4f3eed1..3f01711 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -65,10 +65,13 @@ typedef struct NetClientSource {
GSource source;
/* fix me, to expand as array in future */
GPollFD gfd;
- bool last_rd_sts;
- bool last_wr_sts;
+
Pollable readable;
Pollable writeable;
+ /* status returned by pollable last time */
+ bool last_rd_sts;
+ bool last_wr_sts;
+
void *opaque;
} NetClientSource;
diff --git a/include/net/queue.h b/include/net/queue.h
index fc02b33..f60e57f 100644
--- a/include/net/queue.h
+++ b/include/net/queue.h
@@ -38,6 +38,20 @@ NetQueue *qemu_new_net_queue(void *opaque);
void qemu_del_net_queue(NetQueue *queue);
+void qemu_net_queue_append(NetQueue *queue,
+ NetClientState *sender,
+ unsigned flags,
+ const uint8_t *buf,
+ size_t size,
+ NetPacketSent *sent_cb);
+
+void qemu_net_queue_append_iov(NetQueue *queue,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb);
+
ssize_t qemu_net_queue_send(NetQueue *queue,
NetClientState *sender,
unsigned flags,
diff --git a/net/hub.c b/net/hub.c
index df32074..ab47b1b 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -31,6 +31,7 @@ typedef struct NetHubPort {
QLIST_ENTRY(NetHubPort) next;
NetHub *hub;
int id;
+ EventNotifier e;
} NetHubPort;
struct NetHub {
@@ -52,7 +53,9 @@ static ssize_t net_hub_receive(NetHub *hub, NetHubPort *source_port,
continue;
}
- qemu_send_packet(&port->nc, buf, len);
+ qemu_net_queue_append(port->nc.peer->send_queue, &port->nc,
+ QEMU_NET_PACKET_FLAG_NONE, buf, len, NULL);
+ event_notifier_set(&port->e);
}
return len;
}
@@ -68,7 +71,9 @@ static ssize_t net_hub_receive_iov(NetHub *hub, NetHubPort *source_port,
continue;
}
- qemu_sendv_packet(&port->nc, iov, iovcnt);
+ qemu_net_queue_append_iov(port->nc.peer->send_queue, &port->nc,
+ QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, NULL);
+ event_notifier_set(&port->e);
}
return len;
}
@@ -129,6 +134,11 @@ static void net_hub_port_cleanup(NetClientState *nc)
QLIST_REMOVE(port, next);
}
+static void net_hub_port_bind(NetClientState *nc, GMainContext *ctx)
+{
+ g_source_attach(&nc->nsrc->source, ctx);
+}
+
static NetClientInfo net_hub_port_info = {
.type = NET_CLIENT_OPTIONS_KIND_HUBPORT,
.size = sizeof(NetHubPort),
@@ -136,14 +146,29 @@ static NetClientInfo net_hub_port_info = {
.receive = net_hub_port_receive,
.receive_iov = net_hub_port_receive_iov,
.cleanup = net_hub_port_cleanup,
+ .bind_ctx = net_hub_port_bind,
};
+static gboolean hub_port_handler(gpointer data)
+{
+ NetClientSource *nsrc = (NetClientSource *)data;
+ NetHubPort *port = (NetHubPort *)nsrc->opaque;
+
+ if (nsrc->gfd.revents & G_IO_IN) {
+ event_notifier_test_and_clear(&port->e);
+ qemu_net_queue_flush(port->nc.peer->send_queue);
+ return true;
+ }
+ return false;
+}
+
static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
{
NetClientState *nc;
NetHubPort *port;
int id = hub->num_ports++;
char default_name[128];
+ NetClientSource *nsrc;
if (!name) {
snprintf(default_name, sizeof(default_name),
@@ -155,6 +180,11 @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
port = DO_UPCAST(NetHubPort, nc, nc);
port->id = id;
port->hub = hub;
+ event_notifier_init(&port->e, 0);
+ nsrc = (NetClientSource *)g_source_new(&net_gsource_funcs,
+ sizeof(NetClientSource));
+ net_init_gsource(nc, nsrc, event_notifier_get_fd(&port->e), G_IO_IN,
+ NULL, NULL, hub_port_handler, port);
QLIST_INSERT_HEAD(&hub->ports, port, next);
diff --git a/net/net.c b/net/net.c
index fd5269f..646cad8 100644
--- a/net/net.c
+++ b/net/net.c
@@ -871,6 +871,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
(opts->kind != NET_CLIENT_OPTIONS_KIND_NIC ||
!opts->nic->has_netdev)) {
peer = net_hub_add_port(u.net->has_vlan ? u.net->vlan : 0, NULL);
+ peer->info->bind_ctx(peer, NULL);
}
if (net_client_init_fun[opts->kind](opts, name, peer) < 0) {
diff --git a/net/queue.c b/net/queue.c
index 859d02a..67959f8 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -87,7 +87,7 @@ void qemu_del_net_queue(NetQueue *queue)
g_free(queue);
}
-static void qemu_net_queue_append(NetQueue *queue,
+void qemu_net_queue_append(NetQueue *queue,
NetClientState *sender,
unsigned flags,
const uint8_t *buf,
@@ -110,7 +110,7 @@ static void qemu_net_queue_append(NetQueue *queue,
QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
}
-static void qemu_net_queue_append_iov(NetQueue *queue,
+void qemu_net_queue_append_iov(NetQueue *queue,
NetClientState *sender,
unsigned flags,
const struct iovec *iov,
--
1.7.4.4
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 5:59 [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 1/2] net: port tap " Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 2/2] net: port hub " Liu Ping Fan
@ 2013-03-13 10:58 ` Paolo Bonzini
2013-03-13 12:34 ` Anthony Liguori
2013-03-14 14:08 ` liu ping fan
2 siblings, 2 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-13 10:58 UTC (permalink / raw)
To: Liu Ping Fan
Cc: mdroth, Stefan Hajnoczi, qemu-devel, Anthony Liguori,
Michael S. Tsirkin
Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
> These series aim to port network backend onto glib, and
> prepare for moving towards making network layer mutlit-thread.
> The brief of the whole aim and plan is documented on
> http://wiki.qemu.org/Features/network_reentrant
>
> In these series, attach each NetClientState with a GSource
> At the first, I use AioContext instead of GSource, but after discussion,
> I think with GSource, we can integrated with glib more closely.
Integrating with glib by itself is pointless. What is the *benefit*?
We have a pretty good idea of how to make multithreaded device models
using AioContext, since we are using it for the block layer and
virtio-blk dataplane. Doing the same work twice, on two different
frameworks, doesn't seem like a very good idea.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 10:58 ` [Qemu-devel] [RFC PATCH 0/2] port network layer " Paolo Bonzini
@ 2013-03-13 12:34 ` Anthony Liguori
2013-03-13 16:21 ` Paolo Bonzini
2013-03-14 10:04 ` Peter Maydell
2013-03-14 14:08 ` liu ping fan
1 sibling, 2 replies; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 12:34 UTC (permalink / raw)
To: Paolo Bonzini, Liu Ping Fan
Cc: mdroth, Stefan Hajnoczi, qemu-devel, Michael S. Tsirkin
Paolo Bonzini <pbonzini@redhat.com> writes:
> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
>> These series aim to port network backend onto glib, and
>> prepare for moving towards making network layer mutlit-thread.
>> The brief of the whole aim and plan is documented on
>> http://wiki.qemu.org/Features/network_reentrant
>>
>> In these series, attach each NetClientState with a GSource
>> At the first, I use AioContext instead of GSource, but after discussion,
>> I think with GSource, we can integrated with glib more closely.
>
> Integrating with glib by itself is pointless. What is the *benefit*?
>
> We have a pretty good idea of how to make multithreaded device models
> using AioContext, since we are using it for the block layer and
> virtio-blk dataplane. Doing the same work twice, on two different
> frameworks, doesn't seem like a very good idea.
Hrm, I had thought on previous threads there was clear agreement that we
did not want to use AioContext outside of the block layer.
I think we certainly all agree that moving to a thread aware event loop
is a necessary step toward multi-threading. I think the only question
is whether to use AioContext or glib.
AioContext is necessary for the block layer because the block layer
still has synchronous I/O. I think we should aim to replace all sync
I/O in the long term with coroutine based I/O. That lets us eliminate
AioContextes entirely which is nice as the semantics are subtle.
I think that's a solid argument for glib over AioContext. The former is
well understood, documented, and makes unit testing easier.
Did you have a specific concern with using glib vs. AioContext? Is it
about reusing code in the block layer where AioContext is required?
Regards,
Anthony Liguori
>
> Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 12:34 ` Anthony Liguori
@ 2013-03-13 16:21 ` Paolo Bonzini
2013-03-13 17:06 ` mdroth
2013-03-13 17:23 ` Anthony Liguori
2013-03-14 10:04 ` Peter Maydell
1 sibling, 2 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-13 16:21 UTC (permalink / raw)
To: Anthony Liguori
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Il 13/03/2013 13:34, Anthony Liguori ha scritto:
> Paolo Bonzini <pbonzini@redhat.com> writes:
>
>> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
>>> These series aim to port network backend onto glib, and
>>> prepare for moving towards making network layer mutlit-thread.
>>> The brief of the whole aim and plan is documented on
>>> http://wiki.qemu.org/Features/network_reentrant
>>>
>>> In these series, attach each NetClientState with a GSource
>>> At the first, I use AioContext instead of GSource, but after discussion,
>>> I think with GSource, we can integrated with glib more closely.
>>
>> Integrating with glib by itself is pointless. What is the *benefit*?
>>
>> We have a pretty good idea of how to make multithreaded device models
>> using AioContext, since we are using it for the block layer and
>> virtio-blk dataplane. Doing the same work twice, on two different
>> frameworks, doesn't seem like a very good idea.
>
> Hrm, I had thought on previous threads there was clear agreement that we
> did not want to use AioContext outside of the block layer.
>
> I think we certainly all agree that moving to a thread aware event loop
> is a necessary step toward multi-threading. I think the only question
> is whether to use AioContext or glib.
>
> AioContext is necessary for the block layer because the block layer
> still has synchronous I/O. I think we should aim to replace all sync
> I/O in the long term with coroutine based I/O. That lets us eliminate
> AioContextes entirely which is nice as the semantics are subtle.
>
> I think that's a solid argument for glib over AioContext. The former is
> well understood, documented, and makes unit testing easier.
I don't see anything particularly subtle in AioContext, except
qemu_bh_schedule_idle and the flush callback. The flush callback only
has a naming problem really, it is a relic of qemu_aio_flush().
qemu_bh_schedule_idle could disappear if we converted the floppy disk
drive to AIO; patches existed for that but then the poster disappeared.
glib's main loop has its share of subtleness (GMainLoop vs.
GMainContext, anyone?), and AioContext's code is vastly simpler than
GMainLoop's. AioContext is also documented and unit tested, with tests
for both standalone and GSource operation. Unit tests for AioContext
users are trivial to write, we have one in test-thread-pool.
> Did you have a specific concern with using glib vs. AioContext? Is it
> about reusing code in the block layer where AioContext is required?
In the short term yes, code duplication is a concern. We already have
two implementation of virtio. I would like the dataplane virtio code to
grow everything else that needs to be in all dataplane-style devices
(for example, things such as setting up the guest<->host notifiers), and
the hw/virtio.c API implemented on top of it (or dead altogether).
Usage of AioContext is pretty much forced by the block layer.
However, I'm more worried by overhead. GMainLoop is great because
everybody plugs into it. It enabled the GTK+ front-end, it let us reuse
GIOChannel for chardev flow control, and one can similarly think of
integrating Avahi for example. However, I think it's mostly useful for
simple-minded non-performance-critical code. QEMU has worked great in
almost all scenarios with only one non-VCPU thread, and if we are going
to move stuff to other threads we should only do that because we want
performance and control. I'm not at all confident that GMainLoop can
provide them.
On the contrary, AioContext really does two things (g_poll and bottom
halves) and does them fast. For really high-performance scenarios, such
as the ones virtio-blk-dataplane was written for, I'd be surprised if
glib's main loop had the same performance as AioContext. Also,
AioContext could be easily converted to use epoll, while we don't have
the same level of control on glib's main loop.
Of course I will easily change my mind if I see patches that show the
contrary. :)
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 16:21 ` Paolo Bonzini
@ 2013-03-13 17:06 ` mdroth
2013-03-13 17:31 ` Paolo Bonzini
2013-03-13 17:23 ` Anthony Liguori
1 sibling, 1 reply; 31+ messages in thread
From: mdroth @ 2013-03-13 17:06 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
Anthony Liguori, qemu-devel
On Wed, Mar 13, 2013 at 05:21:02PM +0100, Paolo Bonzini wrote:
> Il 13/03/2013 13:34, Anthony Liguori ha scritto:
> > Paolo Bonzini <pbonzini@redhat.com> writes:
> >
> >> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
> >>> These series aim to port network backend onto glib, and
> >>> prepare for moving towards making network layer mutlit-thread.
> >>> The brief of the whole aim and plan is documented on
> >>> http://wiki.qemu.org/Features/network_reentrant
> >>>
> >>> In these series, attach each NetClientState with a GSource
> >>> At the first, I use AioContext instead of GSource, but after discussion,
> >>> I think with GSource, we can integrated with glib more closely.
> >>
> >> Integrating with glib by itself is pointless. What is the *benefit*?
> >>
> >> We have a pretty good idea of how to make multithreaded device models
> >> using AioContext, since we are using it for the block layer and
> >> virtio-blk dataplane. Doing the same work twice, on two different
> >> frameworks, doesn't seem like a very good idea.
> >
> > Hrm, I had thought on previous threads there was clear agreement that we
> > did not want to use AioContext outside of the block layer.
> >
> > I think we certainly all agree that moving to a thread aware event loop
> > is a necessary step toward multi-threading. I think the only question
> > is whether to use AioContext or glib.
> >
> > AioContext is necessary for the block layer because the block layer
> > still has synchronous I/O. I think we should aim to replace all sync
> > I/O in the long term with coroutine based I/O. That lets us eliminate
> > AioContextes entirely which is nice as the semantics are subtle.
> >
> > I think that's a solid argument for glib over AioContext. The former is
> > well understood, documented, and makes unit testing easier.
>
> I don't see anything particularly subtle in AioContext, except
> qemu_bh_schedule_idle and the flush callback. The flush callback only
> has a naming problem really, it is a relic of qemu_aio_flush().
> qemu_bh_schedule_idle could disappear if we converted the floppy disk
> drive to AIO; patches existed for that but then the poster disappeared.
>
> glib's main loop has its share of subtleness (GMainLoop vs.
> GMainContext, anyone?), and AioContext's code is vastly simpler than
> GMainLoop's. AioContext is also documented and unit tested, with tests
> for both standalone and GSource operation. Unit tests for AioContext
> users are trivial to write, we have one in test-thread-pool.
>
> > Did you have a specific concern with using glib vs. AioContext? Is it
> > about reusing code in the block layer where AioContext is required?
>
> In the short term yes, code duplication is a concern. We already have
> two implementation of virtio. I would like the dataplane virtio code to
> grow everything else that needs to be in all dataplane-style devices
> (for example, things such as setting up the guest<->host notifiers), and
> the hw/virtio.c API implemented on top of it (or dead altogether).
> Usage of AioContext is pretty much forced by the block layer.
>
> However, I'm more worried by overhead. GMainLoop is great because
> everybody plugs into it. It enabled the GTK+ front-end, it let us reuse
> GIOChannel for chardev flow control, and one can similarly think of
> integrating Avahi for example. However, I think it's mostly useful for
> simple-minded non-performance-critical code. QEMU has worked great in
> almost all scenarios with only one non-VCPU thread, and if we are going
> to move stuff to other threads we should only do that because we want
> performance and control. I'm not at all confident that GMainLoop can
> provide them.
But isn't there also an effort to make virtio-blk/virtio-net a model for
threaded devices/subsystems in general, as opposed to "accelerators" for
specific use-cases like tap-based backends? I think this is the main
question, because most of the planning seems contingent on that. And it
seems to me that if the answer is no, then we need to consider the fact
that vhost-net seems to serve this purpose already.
If the answer is yes, don't we also need to look at things like interaction
between slirp and a threaded network device? Based on comments in the
other thread, I thought it was agreed that slirp was a particular example
for something that should be rolled into a GMainContext loop as opposed
to an AioContext based one?
To me this suggests that some event loops will ultimately drive
GMainContext handlers in addition to AioContexts (with the latter perhaps
being driven at a higher priority with PI mutexs and whatever else that
entails). This is already a requirement for the QEMU main loop, so perhaps
that event loop can be moved to common code to lessen the subtleties
between running in a dataplane thread as opposed to the io thread.
What would be nice is if the difference between the iothread's event
loop and a dataplane (or QMP/VNC/etc) thread's event loop was simply the
set of AioContexts/GMainContexts that it drives. We could do that purely
with AioContexts as well, but that rules out a large class of
backends that offloaded event loops can interact with, such as Chardevs,
so I think modelling how to handle both will provide a threading model
that scales better with other devices/subsystems.
>
> On the contrary, AioContext really does two things (g_poll and bottom
> halves) and does them fast. For really high-performance scenarios, such
> as the ones virtio-blk-dataplane was written for, I'd be surprised if
> glib's main loop had the same performance as AioContext. Also,
> AioContext could be easily converted to use epoll, while we don't have
> the same level of control on glib's main loop.
>
> Of course I will easily change my mind if I see patches that show the
> contrary. :)
>
> Paolo
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 16:21 ` Paolo Bonzini
2013-03-13 17:06 ` mdroth
@ 2013-03-13 17:23 ` Anthony Liguori
2013-03-13 17:35 ` Paolo Bonzini
1 sibling, 1 reply; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 17:23 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Paolo Bonzini <pbonzini@redhat.com> writes:
> Il 13/03/2013 13:34, Anthony Liguori ha scritto:
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>>
>>> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
>>>> These series aim to port network backend onto glib, and
>>>> prepare for moving towards making network layer mutlit-thread.
>>>> The brief of the whole aim and plan is documented on
>>>> http://wiki.qemu.org/Features/network_reentrant
>>>>
>>>> In these series, attach each NetClientState with a GSource
>>>> At the first, I use AioContext instead of GSource, but after discussion,
>>>> I think with GSource, we can integrated with glib more closely.
>>>
>>> Integrating with glib by itself is pointless. What is the *benefit*?
>>>
>>> We have a pretty good idea of how to make multithreaded device models
>>> using AioContext, since we are using it for the block layer and
>>> virtio-blk dataplane. Doing the same work twice, on two different
>>> frameworks, doesn't seem like a very good idea.
>>
>> Hrm, I had thought on previous threads there was clear agreement that we
>> did not want to use AioContext outside of the block layer.
>>
>> I think we certainly all agree that moving to a thread aware event loop
>> is a necessary step toward multi-threading. I think the only question
>> is whether to use AioContext or glib.
>>
>> AioContext is necessary for the block layer because the block layer
>> still has synchronous I/O. I think we should aim to replace all sync
>> I/O in the long term with coroutine based I/O. That lets us eliminate
>> AioContextes entirely which is nice as the semantics are subtle.
>>
>> I think that's a solid argument for glib over AioContext. The former is
>> well understood, documented, and makes unit testing easier.
>
> I don't see anything particularly subtle in AioContext, except
> qemu_bh_schedule_idle and the flush callback. The flush callback only
> has a naming problem really, it is a relic of qemu_aio_flush().
> qemu_bh_schedule_idle could disappear if we converted the floppy disk
> drive to AIO; patches existed for that but then the poster
> disappeared.
I think the nesting is also a bit strange.
When an nesting occurs, only the bottom halves and fd events that were
registered in the current depth or greater are invoked. This is very
different from how nesting works with glib and any other event loop
implementation I've worked with.
Normally, all events are executed when nesting. I think our behavior
creates a very subtle and fragile code that tries to make nesting not a
point of re-entrancy. I'm not entirely convinced that we've even gotten
this right historically.
This is my biggest concern with AioContext and using it more throughout
QEMU.
> glib's main loop has its share of subtleness (GMainLoop vs.
> GMainContext, anyone?),
Ack. I understand where it comes from but it's silly.
> and AioContext's code is vastly simpler than GMainLoop's.
For now.
> AioContext is also documented and unit tested, with tests
> for both standalone and GSource operation. Unit tests for AioContext
> users are trivial to write, we have one in test-thread-pool.
>
>> Did you have a specific concern with using glib vs. AioContext? Is it
>> about reusing code in the block layer where AioContext is required?
>
> In the short term yes, code duplication is a concern. We already have
> two implementation of virtio.
I share your concern but in the opposite direction. We have three main
loops today.
> I would like the dataplane virtio code to
> grow everything else that needs to be in all dataplane-style devices
> (for example, things such as setting up the guest<->host notifiers), and
> the hw/virtio.c API implemented on top of it (or dead altogether).
> Usage of AioContext is pretty much forced by the block layer.
I don't think that AioContext is the right answer because it makes it
too easy to shoot yourself in the foot.
> However, I'm more worried by overhead. GMainLoop is great because
> everybody plugs into it. It enabled the GTK+ front-end, it let us reuse
> GIOChannel for chardev flow control, and one can similarly think of
> integrating Avahi for example. However, I think it's mostly useful for
> simple-minded non-performance-critical code. QEMU has worked great in
> almost all scenarios with only one non-VCPU thread, and if we are going
> to move stuff to other threads we should only do that because we want
> performance and control. I'm not at all confident that GMainLoop can
> provide them.
I guess my view on this is that GMainLoop is a well structure and sane
implementation. Its got very few entry points and sane hooks. If we
decide we need epoll or something like that, it would be very easy to
make a drop-in replacement for it.
AioContext will encourage bad habits IMHO. I don't think it's the right
base for !block layer and I think we should aim to eliminate it within
the block layer over time too.
Regards,
Anthony Liguori
> On the contrary, AioContext really does two things (g_poll and bottom
> halves) and does them fast. For really high-performance scenarios, such
> as the ones virtio-blk-dataplane was written for, I'd be surprised if
> glib's main loop had the same performance as AioContext. Also,
> AioContext could be easily converted to use epoll, while we don't have
> the same level of control on glib's main loop.
>
> Of course I will easily change my mind if I see patches that show the
> contrary. :)
>
> Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:06 ` mdroth
@ 2013-03-13 17:31 ` Paolo Bonzini
2013-03-13 17:52 ` Michael S. Tsirkin
0 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-13 17:31 UTC (permalink / raw)
To: mdroth
Cc: Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
Anthony Liguori, qemu-devel
Il 13/03/2013 18:06, mdroth ha scritto:
> But isn't there also an effort to make virtio-blk/virtio-net a model for
> threaded devices/subsystems in general, as opposed to "accelerators" for
> specific use-cases like tap-based backends? I think this is the main
> question, because most of the planning seems contingent on that. And it
> seems to me that if the answer is no, then we need to consider the fact
> that vhost-net seems to serve this purpose already.
The answer is yes, no doubt. And also as playgrounds for making the
backends thread-safe. Of course it is okay to start them as
accelerators for specific use-cases, as long as:
* there is a plan for a final design that works for all back-ends. With
block we were really lucky and each single step provided new features;
but we can still do the same with other subsystems to if we have an idea
of how to get there.
* the amount of duplicated code is limited to the minimum (and hopefully
there is a plan to eradicate duplication altogether). Duplicated code
between non-threaded and threaded version is not bad; duplicated code
across subsystems is much worse.
> If the answer is yes, don't we also need to look at things like interaction
> between slirp and a threaded network device? Based on comments in the
> other thread, I thought it was agreed that slirp was a particular example
> for something that should be rolled into a GMainContext loop as opposed
> to an AioContext based one?
Slirp is a mess. Right now it's neither AioContext nor GMainContext.
But GMainContext is not a silver bullet, because on Win32 it only takes
HANDLEs (just like AioContext), not sockets. GIOChannel is where glib
has the code to provide HANDLEs for sockets (IIRC it uses a buffer that
is managed from a separate thread). Both solutions are fine:
* add sockets to AioContext. Not really network-specific, as it would
enable network block backends (NBD, Curl, ...) on Win32.
* use a GMainLoop for the virtio-net-dataplane event loop. Convert
Slirp to use GIOChannels and add an AioContext (as a GSource) for the
virtio stuff.
I think the first is easier, also because a large part of the code is
already available in main-loop.c.
> To me this suggests that some event loops will ultimately drive
> GMainContext handlers in addition to AioContexts (with the latter perhaps
> being driven at a higher priority with PI mutexs and whatever else that
> entails).
It's possible. I cannot yet say.
The important point is that glib does not have any magic built in. The
risk is to confuse NIH with code in need of refactoring. If we have
NIH, we change to use some other library. If code is ugly and ad-hoc,
we refactor it and then treat it as just another reusable component.
AioContext is the latter, or at least that was my plan. :)
> What would be nice is if the difference between the iothread's event
> loop and a dataplane (or QMP/VNC/etc) thread's event loop was simply the
> set of AioContexts/GMainContexts that it drives.
100% agreed so far (except s/GMainContexts/GSources)...
> We could do that purely
> with AioContexts as well, but that rules out a large class of
> backends that offloaded event loops can interact with, such as Chardevs,
> so I think modelling how to handle both will provide a threading model
> that scales better with other devices/subsystems.
.. but I think the "no magic" argument applies here too. After all we
only have a handful of subsystems. If chardevs are not performance
critical, they can keep running in the main thread.
If one day we find out that we need a real-time serial port, and glib
just doesn't cut it, we shouldn't be ashamed of ripping GIOChannels out,
hand-writing the same stuff, and using a dedicated AioContext. Of
course by the time we get there we'll have unit tests/qtests to make
sure we do not regress. Right??? :)
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:23 ` Anthony Liguori
@ 2013-03-13 17:35 ` Paolo Bonzini
2013-03-13 17:52 ` Anthony Liguori
2013-03-13 17:58 ` Anthony Liguori
0 siblings, 2 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-13 17:35 UTC (permalink / raw)
To: Anthony Liguori
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Il 13/03/2013 18:23, Anthony Liguori ha scritto:
> I think the nesting is also a bit strange.
Nesting's gone since we added coroutines. :)
>> and AioContext's code is vastly simpler than GMainLoop's.
>
> For now.
Fair enough. :)
>> AioContext is also documented and unit tested, with tests
>> for both standalone and GSource operation. Unit tests for AioContext
>> users are trivial to write, we have one in test-thread-pool.
>>
>>> Did you have a specific concern with using glib vs. AioContext? Is it
>>> about reusing code in the block layer where AioContext is required?
>>
>> In the short term yes, code duplication is a concern. We already have
>> two implementation of virtio.
>
> I share your concern but in the opposite direction. We have three main
> loops today.
Yes, and two of them (main-loop.c/qemu-timer.c and async.c) can be merged.
>> I would like the dataplane virtio code to
>> grow everything else that needs to be in all dataplane-style devices
>> (for example, things such as setting up the guest<->host notifiers), and
>> the hw/virtio.c API implemented on top of it (or dead altogether).
>> Usage of AioContext is pretty much forced by the block layer.
>
> I don't think that AioContext is the right answer because it makes it
> too easy to shoot yourself in the foot.
See above, if nesting is the problem it's gone.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:31 ` Paolo Bonzini
@ 2013-03-13 17:52 ` Michael S. Tsirkin
2013-03-13 18:09 ` Anthony Liguori
0 siblings, 1 reply; 31+ messages in thread
From: Michael S. Tsirkin @ 2013-03-13 17:52 UTC (permalink / raw)
To: Paolo Bonzini
Cc: qemu-devel, Stefan Hajnoczi, mdroth, Anthony Liguori,
Liu Ping Fan
On Wed, Mar 13, 2013 at 06:31:57PM +0100, Paolo Bonzini wrote:
> > We could do that purely
> > with AioContexts as well, but that rules out a large class of
> > backends that offloaded event loops can interact with, such as Chardevs,
> > so I think modelling how to handle both will provide a threading model
> > that scales better with other devices/subsystems.
>
> .. but I think the "no magic" argument applies here too. After all we
> only have a handful of subsystems. If chardevs are not performance
> critical, they can keep running in the main thread.
>
> If one day we find out that we need a real-time serial port, and glib
> just doesn't cut it, we shouldn't be ashamed of ripping GIOChannels out,
> hand-writing the same stuff, and using a dedicated AioContext. Of
> course by the time we get there we'll have unit tests/qtests to make
> sure we do not regress. Right??? :)
>
> Paolo
Since you mention serial port, just wanted to say that while it's
bandwidth requirements are not high, we do need to improve its latency.
ATM whenever someone tries to use the emulated serial, guest experiences
stalls and worst case latency jumps, and it's a pain point for many
users.
--
MST
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:35 ` Paolo Bonzini
@ 2013-03-13 17:52 ` Anthony Liguori
2013-03-14 9:29 ` Stefan Hajnoczi
2013-03-13 17:58 ` Anthony Liguori
1 sibling, 1 reply; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 17:52 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Paolo Bonzini <pbonzini@redhat.com> writes:
> Il 13/03/2013 18:23, Anthony Liguori ha scritto:
>> I think the nesting is also a bit strange.
>
> Nesting's gone since we added coroutines. :)
Okay, I owe AioContext a deeper look then.
Regards,
Anthony Liguori
>>> I would like the dataplane virtio code to
>>> grow everything else that needs to be in all dataplane-style devices
>>> (for example, things such as setting up the guest<->host notifiers), and
>>> the hw/virtio.c API implemented on top of it (or dead altogether).
>>> Usage of AioContext is pretty much forced by the block layer.
>>
>> I don't think that AioContext is the right answer because it makes it
>> too easy to shoot yourself in the foot.
>
> See above, if nesting is the problem it's gone.
>
> Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:35 ` Paolo Bonzini
2013-03-13 17:52 ` Anthony Liguori
@ 2013-03-13 17:58 ` Anthony Liguori
2013-03-13 18:08 ` Paolo Bonzini
1 sibling, 1 reply; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 17:58 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Paolo Bonzini <pbonzini@redhat.com> writes:
> Il 13/03/2013 18:23, Anthony Liguori ha scritto:
>> I think the nesting is also a bit strange.
>
> Nesting's gone since we added coroutines. :)
Okay, deeper isn't that hard apparently.
There's not a lot in AioContext. Specifically:
1) It has no facility for timer events
2) It's tied to file descriptors (only a problem for win32)
3) The fd support is tied to read/write without an extensibility
mechanism for other fd events
4) It's got no mechanism to interact with signals
5) It's not obvious how we would integrate with polling-based callbacks
like we have in the character layer and networking layer.
So I agree it's simple but I don't think it can reasonably stay simple.
I think if we added all of the above, the best we would expect to end
up with is something that looked like glib.
As it stands, the lack of (5) would make it extremely difficult to
convert the networking layer.
Regards,
Anthony Liguori
>
>>> and AioContext's code is vastly simpler than GMainLoop's.
>>
>> For now.
>
> Fair enough. :)
>
>>> AioContext is also documented and unit tested, with tests
>>> for both standalone and GSource operation. Unit tests for AioContext
>>> users are trivial to write, we have one in test-thread-pool.
>>>
>>>> Did you have a specific concern with using glib vs. AioContext? Is it
>>>> about reusing code in the block layer where AioContext is required?
>>>
>>> In the short term yes, code duplication is a concern. We already have
>>> two implementation of virtio.
>>
>> I share your concern but in the opposite direction. We have three main
>> loops today.
>
> Yes, and two of them (main-loop.c/qemu-timer.c and async.c) can be merged.
>
>>> I would like the dataplane virtio code to
>>> grow everything else that needs to be in all dataplane-style devices
>>> (for example, things such as setting up the guest<->host notifiers), and
>>> the hw/virtio.c API implemented on top of it (or dead altogether).
>>> Usage of AioContext is pretty much forced by the block layer.
>>
>> I don't think that AioContext is the right answer because it makes it
>> too easy to shoot yourself in the foot.
>
> See above, if nesting is the problem it's gone.
>
> Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:58 ` Anthony Liguori
@ 2013-03-13 18:08 ` Paolo Bonzini
2013-03-13 18:51 ` Anthony Liguori
0 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-13 18:08 UTC (permalink / raw)
To: Anthony Liguori
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
> 1) It has no facility for timer events
Yup, it's on the todo list.
> 2) It's tied to file descriptors (only a problem for win32)
The other way round: it's not tied to file descriptors for win32,
which is already a problem for e.g. networked backends. main-loop.c
has the code that is needed, but it's low on the todo list.
Note that tap-win32 for example doesn't need this.
> 3) The fd support is tied to read/write without an extensibility
> mechanism for other fd events
Yes. Though internally it uses g_poll, so it's "just" a matter
of defining the right API.
> 4) It's got no mechanism to interact with signals
signalfd? (GSource doesn't have any such mechanism directly).
> 5) It's not obvious how we would integrate with polling-based
> callbacks like we have in the character layer and networking layer.
Isn't io_flush one such mechanism? Right now it applies to both
io_read and io_write, but really it is never used for io_write.
Also, this and (3) might be the same problem.
> So I agree it's simple but I don't think it can reasonably stay simple.
> I think if we added all of the above, the best we would expect to end
> up with is something that looked like glib.
>
> As it stands, the lack of (5) would make it extremely difficult to
> convert the networking layer.
Quite possible, I've never looked very much at the networking layer.
Paolo
> Regards,
>
> Anthony Liguori
>
> >
> >>> and AioContext's code is vastly simpler than GMainLoop's.
> >>
> >> For now.
> >
> > Fair enough. :)
> >
> >>> AioContext is also documented and unit tested, with tests
> >>> for both standalone and GSource operation. Unit tests for
> >>> AioContext
> >>> users are trivial to write, we have one in test-thread-pool.
> >>>
> >>>> Did you have a specific concern with using glib vs. AioContext?
> >>>> Is it
> >>>> about reusing code in the block layer where AioContext is
> >>>> required?
> >>>
> >>> In the short term yes, code duplication is a concern. We already
> >>> have
> >>> two implementation of virtio.
> >>
> >> I share your concern but in the opposite direction. We have three
> >> main
> >> loops today.
> >
> > Yes, and two of them (main-loop.c/qemu-timer.c and async.c) can be
> > merged.
> >
> >>> I would like the dataplane virtio code to
> >>> grow everything else that needs to be in all dataplane-style
> >>> devices
> >>> (for example, things such as setting up the guest<->host
> >>> notifiers), and
> >>> the hw/virtio.c API implemented on top of it (or dead
> >>> altogether).
> >>> Usage of AioContext is pretty much forced by the block layer.
> >>
> >> I don't think that AioContext is the right answer because it makes
> >> it
> >> too easy to shoot yourself in the foot.
> >
> > See above, if nesting is the problem it's gone.
> >
> > Paolo
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:52 ` Michael S. Tsirkin
@ 2013-03-13 18:09 ` Anthony Liguori
0 siblings, 0 replies; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 18:09 UTC (permalink / raw)
To: Michael S. Tsirkin, Paolo Bonzini
Cc: qemu-devel, Stefan Hajnoczi, mdroth, Liu Ping Fan
"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Wed, Mar 13, 2013 at 06:31:57PM +0100, Paolo Bonzini wrote:
>> > We could do that purely
>> > with AioContexts as well, but that rules out a large class of
>> > backends that offloaded event loops can interact with, such as Chardevs,
>> > so I think modelling how to handle both will provide a threading model
>> > that scales better with other devices/subsystems.
>>
>> .. but I think the "no magic" argument applies here too. After all we
>> only have a handful of subsystems. If chardevs are not performance
>> critical, they can keep running in the main thread.
>>
>> If one day we find out that we need a real-time serial port, and glib
>> just doesn't cut it, we shouldn't be ashamed of ripping GIOChannels out,
>> hand-writing the same stuff, and using a dedicated AioContext. Of
>> course by the time we get there we'll have unit tests/qtests to make
>> sure we do not regress. Right??? :)
>>
>> Paolo
>
> Since you mention serial port, just wanted to say that while it's
> bandwidth requirements are not high, we do need to improve its latency.
> ATM whenever someone tries to use the emulated serial, guest experiences
> stalls and worst case latency jumps, and it's a pain point for many
> users.
Can you be more specific? I'm not familiar with this issue.
Regards,
Anthony Liguori
>
> --
> MST
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 18:08 ` Paolo Bonzini
@ 2013-03-13 18:51 ` Anthony Liguori
0 siblings, 0 replies; 31+ messages in thread
From: Anthony Liguori @ 2013-03-13 18:51 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Stefan Hajnoczi, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
Paolo Bonzini <pbonzini@redhat.com> writes:
>> 1) It has no facility for timer events
>
> Yup, it's on the todo list.
>
>> 2) It's tied to file descriptors (only a problem for win32)
>
> The other way round: it's not tied to file descriptors for win32,
> which is already a problem for e.g. networked backends. main-loop.c
> has the code that is needed, but it's low on the todo list.
Okay, I'll change this to:
2) It doesn't have a cross-platform API for file descriptor/handle
events.
If we tried to use AioContext for networking, the result would be that
we'd break win32. That's the point I was trying to make.
> Note that tap-win32 for example doesn't need this.
Because tap-win32 doesn't share any code with tap :-/ This is
unfortunate really.
>> 3) The fd support is tied to read/write without an extensibility
>> mechanism for other fd events
>
> Yes. Though internally it uses g_poll, so it's "just" a matter
> of defining the right API.
Ack especially the "just" part :-)
>
>> 4) It's got no mechanism to interact with signals
>
> signalfd? (GSource doesn't have any such mechanism directly).
What I was referring to was the GChildWatch support. I don't think we
make extensive use of SIGCHLD today though so perhaps that's a minor point.
>
>> 5) It's not obvious how we would integrate with polling-based
>> callbacks like we have in the character layer and networking layer.
>
> Isn't io_flush one such mechanism? Right now it applies to both
> io_read and io_write, but really it is never used for io_write.
Sort of but it's not extensible. GSource really gets this right from an
abstraction point of view. It was pretty straight forward to convert
the character layer by just writing a GSource that allowed polling.
> Also, this and (3) might be the same problem.
I agree. I think it needs a GSource-like abstraction which is why I've
been arguing to just copy glib's main loop routines out-right. They've
solved this problem already :-)
>> So I agree it's simple but I don't think it can reasonably stay simple.
>> I think if we added all of the above, the best we would expect to end
>> up with is something that looked like glib.
>>
>> As it stands, the lack of (5) would make it extremely difficult to
>> convert the networking layer.
>
> Quite possible, I've never looked very much at the networking layer.
There's another thing that argues in your favor--or at least for !glib
directly.
I think we want to make the main loops first-class QOM objects. Right
now all we support is 1-thread per dataplane but I'm positive we're
going to want to be able to group devices on threads.
Being able to express AioContexts as properties and even having a
command line interface to create threads woudl be really nice.
Something like:
qemu -object io-thread,id=thread0 \
-device virtio-blk,queue[0]=thread0 \
-device virtio-net,rx[0]=thread0,tx=thread0
Likewise, from a libvirt PoV, being able to do:
qom-get /threads/thread0.tid
Is a real nice touch for pinning purposes.
Regards,
Anthony Liguori
>
> Paolo
>
>> Regards,
>>
>> Anthony Liguori
>>
>> >
>> >>> and AioContext's code is vastly simpler than GMainLoop's.
>> >>
>> >> For now.
>> >
>> > Fair enough. :)
>> >
>> >>> AioContext is also documented and unit tested, with tests
>> >>> for both standalone and GSource operation. Unit tests for
>> >>> AioContext
>> >>> users are trivial to write, we have one in test-thread-pool.
>> >>>
>> >>>> Did you have a specific concern with using glib vs. AioContext?
>> >>>> Is it
>> >>>> about reusing code in the block layer where AioContext is
>> >>>> required?
>> >>>
>> >>> In the short term yes, code duplication is a concern. We already
>> >>> have
>> >>> two implementation of virtio.
>> >>
>> >> I share your concern but in the opposite direction. We have three
>> >> main
>> >> loops today.
>> >
>> > Yes, and two of them (main-loop.c/qemu-timer.c and async.c) can be
>> > merged.
>> >
>> >>> I would like the dataplane virtio code to
>> >>> grow everything else that needs to be in all dataplane-style
>> >>> devices
>> >>> (for example, things such as setting up the guest<->host
>> >>> notifiers), and
>> >>> the hw/virtio.c API implemented on top of it (or dead
>> >>> altogether).
>> >>> Usage of AioContext is pretty much forced by the block layer.
>> >>
>> >> I don't think that AioContext is the right answer because it makes
>> >> it
>> >> too easy to shoot yourself in the foot.
>> >
>> > See above, if nesting is the problem it's gone.
>> >
>> > Paolo
>>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 17:52 ` Anthony Liguori
@ 2013-03-14 9:29 ` Stefan Hajnoczi
2013-03-14 9:53 ` Paolo Bonzini
0 siblings, 1 reply; 31+ messages in thread
From: Stefan Hajnoczi @ 2013-03-14 9:29 UTC (permalink / raw)
To: Anthony Liguori
Cc: mdroth, Paolo Bonzini, Michael S. Tsirkin, Liu Ping Fan,
qemu-devel
On Wed, Mar 13, 2013 at 12:52:11PM -0500, Anthony Liguori wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
>
> > Il 13/03/2013 18:23, Anthony Liguori ha scritto:
> >> I think the nesting is also a bit strange.
> >
> > Nesting's gone since we added coroutines. :)
>
> Okay, I owe AioContext a deeper look then.
We still have one level of nesting - the mainloop vs the AioContext
aio_pool().
After all, we still have synchronous I/O operations like bdrv_read()
where the file descriptors and BHs only execute in the AioContext
aio_poll and other mainloop concepts are suspended (iohandlers, timers,
slirp).
We need to do something about mainloop vs AioContext. There should only
be one interface to add a file descriptor, today we have iohandler and
aio.
Stefan
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 9:29 ` Stefan Hajnoczi
@ 2013-03-14 9:53 ` Paolo Bonzini
0 siblings, 0 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-14 9:53 UTC (permalink / raw)
To: Stefan Hajnoczi
Cc: mdroth, Michael S. Tsirkin, Liu Ping Fan, Anthony Liguori,
qemu-devel
Il 14/03/2013 10:29, Stefan Hajnoczi ha scritto:
> > Okay, I owe AioContext a deeper look then.
>
> We still have one level of nesting - the mainloop vs the AioContext
> aio_pool().
That's a different thing, and it can be solved quite easily. As soon as
each BDS will have its own AioContext, the synchronous operations.
> We need to do something about mainloop vs AioContext. There should only
> be one interface to add a file descriptor, today we have iohandler and
> aio.
Even better, the "other" handlers used by VFIO, VNC, etc should not use
any global state. They should simply be yet another GSource or
AioContext (depending on what's easier).
Furthermore, each QEMUClock should be a separate timerfd (a timer queue
on Windows; emulated using a thread on non-Linux POSIX systems), so that
it can be easily added to an AioContext or wrapped by a GSource.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 12:34 ` Anthony Liguori
2013-03-13 16:21 ` Paolo Bonzini
@ 2013-03-14 10:04 ` Peter Maydell
2013-03-14 10:53 ` Paolo Bonzini
` (2 more replies)
1 sibling, 3 replies; 31+ messages in thread
From: Peter Maydell @ 2013-03-14 10:04 UTC (permalink / raw)
To: Anthony Liguori
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Paolo Bonzini
On 13 March 2013 12:34, Anthony Liguori <anthony@codemonkey.ws> wrote:
> AioContext is necessary for the block layer because the block layer
> still has synchronous I/O. I think we should aim to replace all sync
> I/O in the long term with coroutine based I/O.
I think coroutines are dreadful and we should really not be moving
towards greater use of them. They're just really really not portable
and they don't fit with the C language, and they're a constant source
of problems.(For instance I have a bug I need to look into where we
seem to hang using the gthread coroutine backend but not sigaltstack.)
Use threads, or a genuinely asynchronous API, or a select/poll loop
with callbacks, but not more coroutines please.
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 10:04 ` Peter Maydell
@ 2013-03-14 10:53 ` Paolo Bonzini
2013-03-14 11:00 ` Peter Maydell
2013-03-15 9:13 ` Stefan Hajnoczi
2013-03-19 9:30 ` Markus Armbruster
2 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-14 10:53 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori
Il 14/03/2013 11:04, Peter Maydell ha scritto:
> On 13 March 2013 12:34, Anthony Liguori <anthony@codemonkey.ws> wrote:
>> AioContext is necessary for the block layer because the block layer
>> still has synchronous I/O. I think we should aim to replace all sync
>> I/O in the long term with coroutine based I/O.
>
> I think coroutines are dreadful and we should really not be moving
> towards greater use of them. They're just really really not portable
> and they don't fit with the C language, and they're a constant source
> of problems.(For instance I have a bug I need to look into where we
> seem to hang using the gthread coroutine backend but not sigaltstack.)
The gthread coroutine backend is really more for debugging than anything
else. It works for qemu-io/img, but not for QEMU. Good that you
actually found proof. :)
Paolo
> Use threads, or a genuinely asynchronous API, or a select/poll loop
> with callbacks, but not more coroutines please.
>
> -- PMM
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 10:53 ` Paolo Bonzini
@ 2013-03-14 11:00 ` Peter Maydell
2013-03-14 11:04 ` Paolo Bonzini
0 siblings, 1 reply; 31+ messages in thread
From: Peter Maydell @ 2013-03-14 11:00 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori
On 14 March 2013 10:53, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 14/03/2013 11:04, Peter Maydell ha scritto:
>> I think coroutines are dreadful and we should really not be moving
>> towards greater use of them. They're just really really not portable
>> and they don't fit with the C language, and they're a constant source
>> of problems.(For instance I have a bug I need to look into where we
>> seem to hang using the gthread coroutine backend but not sigaltstack.)
>
> The gthread coroutine backend is really more for debugging than anything
> else. It works for qemu-io/img, but not for QEMU. Good that you
> actually found proof. :)
If it's not supposed to work we shouldn't let configure default
to it (and actually I'd argue we should drop it completely...)
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 11:00 ` Peter Maydell
@ 2013-03-14 11:04 ` Paolo Bonzini
2013-03-14 11:26 ` Peter Maydell
0 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-14 11:04 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori
Il 14/03/2013 12:00, Peter Maydell ha scritto:
>>> >> I think coroutines are dreadful and we should really not be moving
>>> >> towards greater use of them. They're just really really not portable
>>> >> and they don't fit with the C language, and they're a constant source
>>> >> of problems.(For instance I have a bug I need to look into where we
>>> >> seem to hang using the gthread coroutine backend but not sigaltstack.)
>> >
>> > The gthread coroutine backend is really more for debugging than anything
>> > else. It works for qemu-io/img, but not for QEMU. Good that you
>> > actually found proof. :)
> If it's not supposed to work we shouldn't let configure default
> to it
Agreed. I thought it didn't anymore.
> (and actually I'd argue we should drop it completely...)
I'd do the same, but Kevin said he's using it for debugging qemu-io/img.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 11:04 ` Paolo Bonzini
@ 2013-03-14 11:26 ` Peter Maydell
0 siblings, 0 replies; 31+ messages in thread
From: Peter Maydell @ 2013-03-14 11:26 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori
On 14 March 2013 11:04, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> > The gthread coroutine backend is really more for debugging than anything
>>> > else. It works for qemu-io/img, but not for QEMU. Good that you
>>> > actually found proof. :)
>> If it's not supposed to work we shouldn't let configure default
>> to it
>
> Agreed. I thought it didn't anymore.
Nope, we still go ucontext -> gthread (and for platforms which don't
support ucontext we'll go straight to gthread).
I think we should probably be ucontext -> sigaltstack. The configure
code for this is a bit dodgy anyway (for instance there are paths
through it that can end up not setting coroutine_backend at all).
I'll submit a patch to tidy it up and not have gthread as our fallback.
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-13 10:58 ` [Qemu-devel] [RFC PATCH 0/2] port network layer " Paolo Bonzini
2013-03-13 12:34 ` Anthony Liguori
@ 2013-03-14 14:08 ` liu ping fan
2013-03-14 14:18 ` Paolo Bonzini
1 sibling, 1 reply; 31+ messages in thread
From: liu ping fan @ 2013-03-14 14:08 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Stefan Hajnoczi, qemu-devel, Anthony Liguori,
Michael S. Tsirkin
On Wed, Mar 13, 2013 at 6:58 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
>> These series aim to port network backend onto glib, and
>> prepare for moving towards making network layer mutlit-thread.
>> The brief of the whole aim and plan is documented on
>> http://wiki.qemu.org/Features/network_reentrant
>>
>> In these series, attach each NetClientState with a GSource
>> At the first, I use AioContext instead of GSource, but after discussion,
>> I think with GSource, we can integrated with glib more closely.
>
> Integrating with glib by itself is pointless. What is the *benefit*?
>
> We have a pretty good idea of how to make multithreaded device models
> using AioContext, since we are using it for the block layer and
> virtio-blk dataplane. Doing the same work twice, on two different
> frameworks, doesn't seem like a very good idea.
>
One thing is that AioContext lacks of something which can handle
IOCanReadHandler.
Regards,
Pingfan
> Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 14:08 ` liu ping fan
@ 2013-03-14 14:18 ` Paolo Bonzini
0 siblings, 0 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-14 14:18 UTC (permalink / raw)
To: liu ping fan
Cc: Stefan Hajnoczi, Michael S. Tsirkin, mdroth, Anthony Liguori,
qemu-devel
Il 14/03/2013 15:08, liu ping fan ha scritto:
> On Wed, Mar 13, 2013 at 6:58 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 13/03/2013 06:59, Liu Ping Fan ha scritto:
>>> These series aim to port network backend onto glib, and
>>> prepare for moving towards making network layer mutlit-thread.
>>> The brief of the whole aim and plan is documented on
>>> http://wiki.qemu.org/Features/network_reentrant
>>>
>>> In these series, attach each NetClientState with a GSource
>>> At the first, I use AioContext instead of GSource, but after discussion,
>>> I think with GSource, we can integrated with glib more closely.
>>
>> Integrating with glib by itself is pointless. What is the *benefit*?
>>
>> We have a pretty good idea of how to make multithreaded device models
>> using AioContext, since we are using it for the block layer and
>> virtio-blk dataplane. Doing the same work twice, on two different
>> frameworks, doesn't seem like a very good idea.
>>
> One thing is that AioContext lacks of something which can handle
> IOCanReadHandler.
See reply to Anthony. io_flush is very similar to that. Right now it
affects both reading and writing, but I think it wouldn't be a problem
to make it affect io_read only.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 10:04 ` Peter Maydell
2013-03-14 10:53 ` Paolo Bonzini
@ 2013-03-15 9:13 ` Stefan Hajnoczi
2013-03-19 9:30 ` Markus Armbruster
2 siblings, 0 replies; 31+ messages in thread
From: Stefan Hajnoczi @ 2013-03-15 9:13 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, qemu-devel, Liu Ping Fan,
Anthony Liguori, Paolo Bonzini
On Thu, Mar 14, 2013 at 10:04:23AM +0000, Peter Maydell wrote:
> On 13 March 2013 12:34, Anthony Liguori <anthony@codemonkey.ws> wrote:
> > AioContext is necessary for the block layer because the block layer
> > still has synchronous I/O. I think we should aim to replace all sync
> > I/O in the long term with coroutine based I/O.
>
> I think coroutines are dreadful and we should really not be moving
> towards greater use of them. They're just really really not portable
> and they don't fit with the C language, and they're a constant source
> of problems.(For instance I have a bug I need to look into where we
> seem to hang using the gthread coroutine backend but not sigaltstack.)
>
> Use threads, or a genuinely asynchronous API, or a select/poll loop
> with callbacks, but not more coroutines please.
You're right that coroutines make it harder to support QEMU across host
operating systems.
Why we have coroutines
======================
That said, we need an alternative mechanism for writing sequential code.
If we went through the effort of converting all coroutine and blocking
code to asynchronous callbacks the codebase would be very hard to
understand:
Straightforward operations would be split into many little callbacks.
Local variables would turn into state that gets passed between
callbacks. This sort of code is much harder to reason about and is why
coroutines were introduced.
Coroutines allow us to keep the block layer and some of its users
readable. Removing them would not only be a big effort, but would also
make the codebase worse.
An alternative to stack switching
=================================
I was looking at a second approach when considering the options:
continuation passing style transformation. The following research
project implements an extension to C for writing sequential code that
gets converted to callbacks:
http://www.pps.univ-paris-diderot.fr/~kerneis/software/cpc/
Quick summary of how it works:
Continuating passing functions are marked with coroutine_fn, similar to
how we already do it in QEMU. There are APIs to create new coroutines
and to yield. The trick is that stack switching is not used and
therefore this approach is portable C. Instead the code is translated
to callbacks at build-time and variables are automatically passed on the
heap in a "thunk" buffer.
I chatted with the authors and it looks like a good approach. The
problems are:
1. Uses OCaml compiler framework. Should this be part of gcc or an
external tool that is easily consumed on all QEMU host platforms?
2. Debug information - need to make sure that callbacks map back to
sequential code.
3. Maturity - cpc isn't used much to my knowledge.
I'm willing to mentor a Summer of Code project that attempts to replace
coroutines with cpc. There's no guarantee it can replace coroutines due
to the issues I listed, but I think it's worth a shot.
Peter: Does this approach address your concerns about coroutines?
Thoughts about using a build-time tool instead of run-time stack
switching?
Stefan
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-14 10:04 ` Peter Maydell
2013-03-14 10:53 ` Paolo Bonzini
2013-03-15 9:13 ` Stefan Hajnoczi
@ 2013-03-19 9:30 ` Markus Armbruster
2013-03-19 10:12 ` Peter Maydell
2 siblings, 1 reply; 31+ messages in thread
From: Markus Armbruster @ 2013-03-19 9:30 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori, Paolo Bonzini
Peter Maydell <peter.maydell@linaro.org> writes:
> On 13 March 2013 12:34, Anthony Liguori <anthony@codemonkey.ws> wrote:
>> AioContext is necessary for the block layer because the block layer
>> still has synchronous I/O. I think we should aim to replace all sync
>> I/O in the long term with coroutine based I/O.
>
> I think coroutines are dreadful and we should really not be moving
> towards greater use of them. They're just really really not portable
> and they don't fit with the C language, and they're a constant source
> of problems.(For instance I have a bug I need to look into where we
> seem to hang using the gthread coroutine backend but not sigaltstack.)
>
> Use threads, or a genuinely asynchronous API, or a select/poll loop
> with callbacks, but not more coroutines please.
I disagree.
Coroutines are a perfectly pedestrian control flow construct. Stefan
already explained why we use them, and why callbacks are not a viable
alternative when things get hairy.
Coroutines fit about as well with C as threads, namely not really.
Unfortunate, but switching to threads won't improve that aspect one bit,
just add "interesting" concurrency and performance problems to the mix.
If portable coroutines in C really was an intractable problem, the
solution could not be "no more coroutines, please", only "no coroutines,
period". As long as we have to pay the price for coroutines anyway, I
can't see why we should deny ourselves the benefits.
C programs have been doing coroutines since forever, using either hand
coded assembly language stack switching, sigaltstack() trickery,
ucontext(), w32 fibers, or some coroutine library built on top of these.
We chose "all of the above" (except for assembly). No wonder we got
more problems.
Some of our portability problems come from our heroic (quixotic?) quest
to keep QEMU portable to the widest range of hosts, without sacrificing
performance on any of them. Sometimes it takes a tough man to make a
tender chicken.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-19 9:30 ` Markus Armbruster
@ 2013-03-19 10:12 ` Peter Maydell
2013-03-19 10:34 ` Paolo Bonzini
0 siblings, 1 reply; 31+ messages in thread
From: Peter Maydell @ 2013-03-19 10:12 UTC (permalink / raw)
To: Markus Armbruster
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Liu Ping Fan, Anthony Liguori, Paolo Bonzini
On 19 March 2013 09:30, Markus Armbruster <armbru@redhat.com> wrote:
> Coroutines are a perfectly pedestrian control flow construct.
In some languages, sure. Not in C, and we're writing C.
> Coroutines fit about as well with C as threads, namely not really.
Threads are supported by the language runtime provided on all
the systems we support, which is why they are reasonably usable.
When you've persuaded glibc, MacOSX libc and Windows to implement
coroutines please come back and let me know :-)
> If portable coroutines in C really was an intractable problem, the
> solution could not be "no more coroutines, please", only "no coroutines,
> period". As long as we have to pay the price for coroutines anyway, I
> can't see why we should deny ourselves the benefits.
I'd like to see coroutines gone completely. The first step is not
to let them get used more than they are already.
> C programs have been doing coroutines since forever, using either hand
> coded assembly language stack switching, sigaltstack() trickery,
> ucontext(), w32 fibers, or some coroutine library built on top of these.
...and there's a wide range of really nasty options because none
of them are actually decent solutions to the problem.
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-19 10:12 ` Peter Maydell
@ 2013-03-19 10:34 ` Paolo Bonzini
2013-03-19 10:38 ` Peter Maydell
0 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-19 10:34 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Markus Armbruster, Anthony Liguori, Liu Ping Fan
Il 19/03/2013 11:12, Peter Maydell ha scritto:
> On 19 March 2013 09:30, Markus Armbruster <armbru@redhat.com> wrote:
>> Coroutines are a perfectly pedestrian control flow construct.
>
> In some languages, sure. Not in C, and we're writing C.
>
>> Coroutines fit about as well with C as threads, namely not really.
>
> Threads are supported by the language runtime provided on all
> the systems we support, which is why they are reasonably usable.
> When you've persuaded glibc, MacOSX libc and Windows to implement
> coroutines please come back and let me know :-)
Windows supports them (it calls them fibers) and glibc does on many
architectures (all but ARM, basically).
Paolo
>> If portable coroutines in C really was an intractable problem, the
>> solution could not be "no more coroutines, please", only "no coroutines,
>> period". As long as we have to pay the price for coroutines anyway, I
>> can't see why we should deny ourselves the benefits.
>
> I'd like to see coroutines gone completely. The first step is not
> to let them get used more than they are already.
>
>> C programs have been doing coroutines since forever, using either hand
>> coded assembly language stack switching, sigaltstack() trickery,
>> ucontext(), w32 fibers, or some coroutine library built on top of these.
>
> ...and there's a wide range of really nasty options because none
> of them are actually decent solutions to the problem.
>
> -- PMM
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-19 10:34 ` Paolo Bonzini
@ 2013-03-19 10:38 ` Peter Maydell
2013-03-19 10:45 ` Paolo Bonzini
0 siblings, 1 reply; 31+ messages in thread
From: Peter Maydell @ 2013-03-19 10:38 UTC (permalink / raw)
To: Paolo Bonzini
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Markus Armbruster, Anthony Liguori, Liu Ping Fan
On 19 March 2013 10:34, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 19/03/2013 11:12, Peter Maydell ha scritto:
>> Threads are supported by the language runtime provided on all
>> the systems we support, which is why they are reasonably usable.
>> When you've persuaded glibc, MacOSX libc and Windows to implement
>> coroutines please come back and let me know :-)
>
> Windows supports them (it calls them fibers) and glibc does on many
> architectures (all but ARM, basically).
If you mean ucontext, I'm not sure I'd call that coroutine
support at the library level (and we did implement it on
ARM glibc).
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib
2013-03-19 10:38 ` Peter Maydell
@ 2013-03-19 10:45 ` Paolo Bonzini
0 siblings, 0 replies; 31+ messages in thread
From: Paolo Bonzini @ 2013-03-19 10:45 UTC (permalink / raw)
To: Peter Maydell
Cc: mdroth, Michael S. Tsirkin, Stefan Hajnoczi, qemu-devel,
Markus Armbruster, Anthony Liguori, Liu Ping Fan
Il 19/03/2013 11:38, Peter Maydell ha scritto:
> On 19 March 2013 10:34, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 19/03/2013 11:12, Peter Maydell ha scritto:
>>> Threads are supported by the language runtime provided on all
>>> the systems we support, which is why they are reasonably usable.
>>> When you've persuaded glibc, MacOSX libc and Windows to implement
>>> coroutines please come back and let me know :-)
>>
>> Windows supports them (it calls them fibers) and glibc does on many
>> architectures (all but ARM, basically).
>
> If you mean ucontext, I'm not sure I'd call that coroutine
> support at the library level (and we did implement it on
> ARM glibc).
Yes, I mean ucontext, more precisely makecontext/setcontext. Portably
creating a new stack is really the crux of coroutine support.
Paolo
^ permalink raw reply [flat|nested] 31+ messages in thread
end of thread, other threads:[~2013-03-19 11:40 UTC | newest]
Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-13 5:59 [Qemu-devel] [RFC PATCH 0/2] port network layer onto glib Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 1/2] net: port tap " Liu Ping Fan
2013-03-13 5:59 ` [Qemu-devel] [RFC PATCH 2/2] net: port hub " Liu Ping Fan
2013-03-13 10:58 ` [Qemu-devel] [RFC PATCH 0/2] port network layer " Paolo Bonzini
2013-03-13 12:34 ` Anthony Liguori
2013-03-13 16:21 ` Paolo Bonzini
2013-03-13 17:06 ` mdroth
2013-03-13 17:31 ` Paolo Bonzini
2013-03-13 17:52 ` Michael S. Tsirkin
2013-03-13 18:09 ` Anthony Liguori
2013-03-13 17:23 ` Anthony Liguori
2013-03-13 17:35 ` Paolo Bonzini
2013-03-13 17:52 ` Anthony Liguori
2013-03-14 9:29 ` Stefan Hajnoczi
2013-03-14 9:53 ` Paolo Bonzini
2013-03-13 17:58 ` Anthony Liguori
2013-03-13 18:08 ` Paolo Bonzini
2013-03-13 18:51 ` Anthony Liguori
2013-03-14 10:04 ` Peter Maydell
2013-03-14 10:53 ` Paolo Bonzini
2013-03-14 11:00 ` Peter Maydell
2013-03-14 11:04 ` Paolo Bonzini
2013-03-14 11:26 ` Peter Maydell
2013-03-15 9:13 ` Stefan Hajnoczi
2013-03-19 9:30 ` Markus Armbruster
2013-03-19 10:12 ` Peter Maydell
2013-03-19 10:34 ` Paolo Bonzini
2013-03-19 10:38 ` Peter Maydell
2013-03-19 10:45 ` Paolo Bonzini
2013-03-14 14:08 ` liu ping fan
2013-03-14 14:18 ` Paolo Bonzini
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).