qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
@ 2013-04-16 11:10 Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 1/3] qemu-char: use consistent idiom for removing sources Paolo Bonzini
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-16 11:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: kraxel

I'm happy to say that I'm not touching IOWatchPoll (well, almost: only
for consistency).  Instead, these patches try to make the code consistent
(thus avoiding CRITICAL messages from glib) and to fix detection of pty
connections; Gerd reported that it went berserk with polling.  I think it
is the same failure reported by Michael Hines, but I couldn't reproduce
it with "-serial pty", only with the monitor.

Paolo Bonzini (3):
  qemu-char: use consistent idiom for removing sources
  qemu-char: simplify pty polling
  qemu-char: correct return value from chr_read functions

 qemu-char.c | 95 ++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 56 insertions(+), 39 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH 1/3] qemu-char: use consistent idiom for removing sources
  2013-04-16 11:10 [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Paolo Bonzini
@ 2013-04-16 11:10 ` Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 2/3] qemu-char: simplify pty polling Paolo Bonzini
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-16 11:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: kraxel

Always check that the source is active, and zero the tag afterwards.

The occurrence in pty_chr_state will trigger with the next patch, the
others are just theoretical.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-char.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 728ed9b..552a498 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -644,9 +644,11 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
 static void io_watch_poll_finalize(GSource *source)
 {
     IOWatchPoll *iwp = io_watch_poll_from_source(source);
-    g_source_destroy(iwp->src);
-    g_source_unref(iwp->src);
-    iwp->src = NULL;
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
 }
 
 static GSourceFuncs io_watch_poll_funcs = {
@@ -816,6 +818,7 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
 
     if (s->fd_in_tag) {
         g_source_remove(s->fd_in_tag);
+        s->fd_in_tag = 0;
     }
 
     if (s->fd_in) {
@@ -1148,8 +1151,10 @@ static void pty_chr_state(CharDriverState *chr, int connected)
     PtyCharDriver *s = chr->opaque;
 
     if (!connected) {
-        g_source_remove(s->fd_tag);
-        s->fd_tag = 0;
+        if (s->fd_tag) {
+            g_source_remove(s->fd_tag);
+            s->fd_tag = 0;
+        }
         s->connected = 0;
         s->polling = 0;
         /* (re-)connect poll interval for idle guests: once per second.
@@ -1171,12 +1176,14 @@ static void pty_chr_close(struct CharDriverState *chr)
 
     if (s->fd_tag) {
         g_source_remove(s->fd_tag);
+        s->fd_tag = 0;
     }
     fd = g_io_channel_unix_get_fd(s->fd);
     g_io_channel_unref(s->fd);
     close(fd);
     if (s->timer_tag) {
         g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
     }
     g_free(s);
     qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
@@ -2277,6 +2284,7 @@ static void udp_chr_close(CharDriverState *chr)
     NetCharDriver *s = chr->opaque;
     if (s->tag) {
         g_source_remove(s->tag);
+        s->tag = 0;
     }
     if (s->chan) {
         g_io_channel_unref(s->chan);
@@ -2508,8 +2516,10 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
         if (s->listen_chan) {
             s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr);
         }
-        g_source_remove(s->tag);
-        s->tag = 0;
+        if (s->tag) {
+            g_source_remove(s->tag);
+            s->tag = 0;
+        }
         g_io_channel_unref(s->chan);
         s->chan = NULL;
         closesocket(s->fd);
@@ -2570,8 +2580,10 @@ static int tcp_chr_add_client(CharDriverState *chr, int fd)
         socket_set_nodelay(fd);
     s->fd = fd;
     s->chan = io_channel_from_socket(fd);
-    g_source_remove(s->listen_tag);
-    s->listen_tag = 0;
+    if (s->listen_tag) {
+        g_source_remove(s->listen_tag);
+        s->listen_tag = 0;
+    }
     tcp_chr_connect(chr);
 
     return 0;
@@ -2621,6 +2633,7 @@ static void tcp_chr_close(CharDriverState *chr)
     if (s->fd >= 0) {
         if (s->tag) {
             g_source_remove(s->tag);
+            s->tag = 0;
         }
         if (s->chan) {
             g_io_channel_unref(s->chan);
@@ -2630,6 +2643,7 @@ static void tcp_chr_close(CharDriverState *chr)
     if (s->listen_fd >= 0) {
         if (s->listen_tag) {
             g_source_remove(s->listen_tag);
+            s->listen_tag = 0;
         }
         if (s->listen_chan) {
             g_io_channel_unref(s->listen_chan);
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH 2/3] qemu-char: simplify pty polling
  2013-04-16 11:10 [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 1/3] qemu-char: use consistent idiom for removing sources Paolo Bonzini
@ 2013-04-16 11:10 ` Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 3/3] qemu-char: correct return value from chr_read functions Paolo Bonzini
  2013-04-16 11:22 ` [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Gerd Hoffmann
  3 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-16 11:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: kraxel

There is no need to use a timer and pty_chr_read to detect a connected
pty.  It is simpler to just call g_poll periodically and check for POLLHUP.
It is done once per second, and only if the pty is disconnected, so it
is cheap enough.

Tested with "-monitor pty" and "-serial mon:pty", both of which work
correctly and do not freeze QEMU.  (How to test ptys?  "socat -,raw,echo=0
/dev/pts/4,raw").

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-char.c | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 552a498..d14888d 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -1028,7 +1028,6 @@ typedef struct {
     GIOChannel *fd;
     guint fd_tag;
     int connected;
-    int polling;
     int read_bytes;
     guint timer_tag;
 } PtyCharDriver;
@@ -1044,12 +1043,6 @@ static gboolean pty_chr_timer(gpointer opaque)
     if (s->connected) {
         goto out;
     }
-    if (s->polling) {
-        /* If we arrive here without polling being cleared due
-         * read returning -EIO, then we are (re-)connected */
-        pty_chr_state(chr, 1);
-        goto out;
-    }
 
     /* Next poll ... */
     pty_chr_update_read_handler(chr);
@@ -1128,22 +1121,17 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
 static void pty_chr_update_read_handler(CharDriverState *chr)
 {
     PtyCharDriver *s = chr->opaque;
+    GPollFD pfd;
 
-    if (s->fd_tag) {
-        g_source_remove(s->fd_tag);
+    pfd.fd = g_io_channel_unix_get_fd(s->fd);
+    pfd.events = G_IO_OUT;
+    pfd.revents = 0;
+    g_poll(&pfd, 1, 0);
+    if (pfd.revents & G_IO_HUP) {
+        pty_chr_state(chr, 0);
+    } else {
+        pty_chr_state(chr, 1);
     }
-
-    s->fd_tag = io_add_watch_poll(s->fd, pty_chr_read_poll, pty_chr_read, chr);
-    s->polling = 1;
-    /*
-     * Short timeout here: just need wait long enougth that qemu makes
-     * it through the poll loop once.  When reconnected we want a
-     * short timeout so we notice it almost instantly.  Otherwise
-     * read() gives us -EIO instantly, making pty_chr_state() reset the
-     * timeout to the normal (much longer) poll interval before the
-     * timer triggers.
-     */
-    pty_chr_rearm_timer(chr, 10);
 }
 
 static void pty_chr_state(CharDriverState *chr, int connected)
@@ -1156,15 +1144,20 @@ static void pty_chr_state(CharDriverState *chr, int connected)
             s->fd_tag = 0;
         }
         s->connected = 0;
-        s->polling = 0;
         /* (re-)connect poll interval for idle guests: once per second.
          * We check more frequently in case the guests sends data to
          * the virtual device linked to our pty. */
         pty_chr_rearm_timer(chr, 1000);
     } else {
-        if (!s->connected)
+        if (s->timer_tag) {
+            g_source_remove(s->timer_tag);
+            s->timer_tag = 0;
+        }
+        if (!s->connected) {
             qemu_chr_be_generic_open(chr);
-        s->connected = 1;
+            s->connected = 1;
+            s->fd_tag = io_add_watch_poll(s->fd, pty_chr_read_poll, pty_chr_read, chr);
+        }
     }
 }
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH 3/3] qemu-char: correct return value from chr_read functions
  2013-04-16 11:10 [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 1/3] qemu-char: use consistent idiom for removing sources Paolo Bonzini
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 2/3] qemu-char: simplify pty polling Paolo Bonzini
@ 2013-04-16 11:10 ` Paolo Bonzini
  2013-04-16 11:22 ` [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Gerd Hoffmann
  3 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-16 11:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: kraxel

Even if a CharDriverState's source is blocked by the front-end,
it must not be dropped. The IOWatchPoll that wraps it will take
care of adding and removing it to the main loop.  Only remove
the source when the channel is closed; and in that case, make sure
that the wrapping IOWatchPoll is removed too.

These should just be theoretical bugs.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-char.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index d14888d..6e897da 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -781,12 +781,16 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
         len = s->max_size;
     }
     if (len == 0) {
-        return FALSE;
+        return TRUE;
     }
 
     status = g_io_channel_read_chars(chan, (gchar *)buf,
                                      len, &bytes_read, NULL);
     if (status == G_IO_STATUS_EOF) {
+        if (s->fd_in_tag) {
+            g_source_remove(s->fd_in_tag);
+            s->fd_in_tag = 0;
+        }
         qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
         return FALSE;
     }
@@ -1105,8 +1109,9 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     len = sizeof(buf);
     if (len > s->read_bytes)
         len = s->read_bytes;
-    if (len == 0)
-        return FALSE;
+    if (len == 0) {
+        return TRUE;
+    }
     status = g_io_channel_read_chars(s->fd, (gchar *)buf, len, &size, NULL);
     if (status != G_IO_STATUS_NORMAL) {
         pty_chr_state(chr, 0);
@@ -2238,13 +2243,18 @@ static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     gsize bytes_read = 0;
     GIOStatus status;
 
-    if (s->max_size == 0)
-        return FALSE;
+    if (s->max_size == 0) {
+        return TRUE;
+    }
     status = g_io_channel_read_chars(s->chan, (gchar *)s->buf, sizeof(s->buf),
                                      &bytes_read, NULL);
     s->bufcnt = bytes_read;
     s->bufptr = s->bufcnt;
     if (status != G_IO_STATUS_NORMAL) {
+        if (s->tag) {
+            g_source_remove(s->tag);
+            s->tag = 0;
+        }
         return FALSE;
     }
 
@@ -2497,7 +2507,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     int len, size;
 
     if (!s->connected || s->max_size <= 0) {
-        return FALSE;
+        return TRUE;
     }
     len = sizeof(buf);
     if (len > s->max_size)
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-16 11:10 [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Paolo Bonzini
                   ` (2 preceding siblings ...)
  2013-04-16 11:10 ` [Qemu-devel] [PATCH 3/3] qemu-char: correct return value from chr_read functions Paolo Bonzini
@ 2013-04-16 11:22 ` Gerd Hoffmann
  2013-04-17  7:06   ` Gerd Hoffmann
  3 siblings, 1 reply; 12+ messages in thread
From: Gerd Hoffmann @ 2013-04-16 11:22 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On 04/16/13 13:10, Paolo Bonzini wrote:
> I'm happy to say that I'm not touching IOWatchPoll (well, almost: only
> for consistency).  Instead, these patches try to make the code consistent
> (thus avoiding CRITICAL messages from glib) and to fix detection of pty
> connections; Gerd reported that it went berserk with polling.  I think it
> is the same failure reported by Michael Hines, but I couldn't reproduce
> it with "-serial pty", only with the monitor.

Things are back to normal with the series applied.

Tested-by: Gerd Hoffmann <kraxel@redhat.com>

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-16 11:22 ` [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Gerd Hoffmann
@ 2013-04-17  7:06   ` Gerd Hoffmann
  2013-04-17  9:28     ` Paolo Bonzini
  2013-04-17 10:17     ` Paolo Bonzini
  0 siblings, 2 replies; 12+ messages in thread
From: Gerd Hoffmann @ 2013-04-17  7:06 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On 04/16/13 13:22, Gerd Hoffmann wrote:
> On 04/16/13 13:10, Paolo Bonzini wrote:
>> I'm happy to say that I'm not touching IOWatchPoll (well, almost: only
>> for consistency).  Instead, these patches try to make the code consistent
>> (thus avoiding CRITICAL messages from glib) and to fix detection of pty
>> connections; Gerd reported that it went berserk with polling.  I think it
>> is the same failure reported by Michael Hines, but I couldn't reproduce
>> it with "-serial pty", only with the monitor.
> 
> Things are back to normal with the series applied.
> 
> Tested-by: Gerd Hoffmann <kraxel@redhat.com>
> 
> cheers,
>   Gerd

Trapped into the next issue.  Added qmp monitor to the virtual machine,
linked to unix socket.  Start playing with the qom-* scrips in QMP.
qemu hangs after the first qmp-list command (triggered by socket
disconnect?).

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-17  7:06   ` Gerd Hoffmann
@ 2013-04-17  9:28     ` Paolo Bonzini
  2013-04-17 10:17     ` Paolo Bonzini
  1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-17  9:28 UTC (permalink / raw)
  To: Gerd Hoffmann, qemu-devel

Il 17/04/2013 09:06, Gerd Hoffmann ha scritto:
> Trapped into the next issue.  Added qmp monitor to the virtual machine,
> linked to unix socket.  Start playing with the qom-* scrips in QMP.
> qemu hangs after the first qmp-list command (triggered by socket
> disconnect?).

Yeah, I was going to test sockets more today... you beat me...

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-17  7:06   ` Gerd Hoffmann
  2013-04-17  9:28     ` Paolo Bonzini
@ 2013-04-17 10:17     ` Paolo Bonzini
  2013-04-18  6:14       ` Gerd Hoffmann
  1 sibling, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-17 10:17 UTC (permalink / raw)
  To: Gerd Hoffmann, qemu-devel

Il 17/04/2013 09:06, Gerd Hoffmann ha scritto:
>> > cheers,
>> >   Gerd
> Trapped into the next issue.  Added qmp monitor to the virtual machine,
> linked to unix socket.  Start playing with the qom-* scrips in QMP.
> qemu hangs after the first qmp-list command (triggered by socket
> disconnect?).

Hmm, I cannot reproduce this. 

x86_64-softmmu/qemu-system-x86_64 -qmp unix:/home/pbonzini/qmp.sock,nowait,server -vnc :1

(Tried also without nowait, with SDL or -nographic, with KVM, with an additional
"-serial mon:stdio").

Multiple executions of "./qom-list -s /home/pbonzini/qmp.sock /machine" work,
both with and without these three patches.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-17 10:17     ` Paolo Bonzini
@ 2013-04-18  6:14       ` Gerd Hoffmann
  2013-04-18 12:38         ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Gerd Hoffmann @ 2013-04-18  6:14 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On 04/17/13 12:17, Paolo Bonzini wrote:
> Il 17/04/2013 09:06, Gerd Hoffmann ha scritto:
>>>> cheers,
>>>>   Gerd
>> Trapped into the next issue.  Added qmp monitor to the virtual machine,
>> linked to unix socket.  Start playing with the qom-* scrips in QMP.
>> qemu hangs after the first qmp-list command (triggered by socket
>> disconnect?).
> 
> Hmm, I cannot reproduce this. 
> 
> x86_64-softmmu/qemu-system-x86_64 -qmp unix:/home/pbonzini/qmp.sock,nowait,server -vnc :1
> 
> (Tried also without nowait, with SDL or -nographic, with KVM, with an additional
> "-serial mon:stdio").
> 
> Multiple executions of "./qom-list -s /home/pbonzini/qmp.sock /machine" work,
> both with and without these three patches.

Hmm.  I'm running with a bunch of monitors actually, maybe that makes
the difference.  qemu is started via libvirt:

rincewind root ~# virsh dumpxml fedora-org-virtio
<domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'>
  <name>fedora-org-virtio</name>
[ ... ]
  <qemu:commandline>
    <qemu:arg value='-readconfig'/>
    <qemu:arg value='/root/libvirt/fedora-mon.cfg'/>
[ ... ]
rincewind root ~# cat /root/libvirt/fedora-mon.cfg
[chardev "hmp"]
  backend = "socket"
  path = "/root/mon/fedora"
  server = "on"
  wait = "off"

[chardev "qmp"]
  backend = "socket"
  path = "/root/mon/fedora.qmp"
  server = "on"
  wait = "off"

[mon]
  chardev = "hmp"

[mon]
  chardev = "qmp"
  mode = "control"


Hangs here:

(gdb) thread apply all bt

Thread 6 (Thread 0x7f902919b700 (LWP 27869)):
#0  sem_timedwait () at
../nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S:106
#1  0x00007f90385307f3 in qemu_sem_timedwait (sem=0x7f903abe0978,
ms=<value optimized out>)
    at /home/kraxel/projects/qemu/util/qemu-thread-posix.c:237
#2  0x00007f90383e835e in worker_thread (opaque=0x7f903abe08e0)
    at /home/kraxel/projects/qemu/thread-pool.c:96
#3  0x00007f90366a7851 in start_thread (arg=0x7f902919b700) at
pthread_create.c:301
#4  0x00007f9030bf890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115

Thread 5 (Thread 0x7f901cfc7700 (LWP 27870)):
#0  __lll_lock_wait () at
../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1  0x00007f90366a9388 in _L_lock_854 () from /lib64/libpthread.so.0
#2  0x00007f90366a9257 in __pthread_mutex_lock (mutex=0x7f9038d5af20) at
pthread_mutex_lock.c:61
#3  0x00007f9038530c99 in qemu_mutex_lock (mutex=<value optimized out>)
    at /home/kraxel/projects/qemu/util/qemu-thread-posix.c:57
#4  0x00007f9038476f95 in kvm_cpu_exec (env=0x7f903fe6f680)
    at /home/kraxel/projects/qemu/kvm-all.c:1558
#5  0x00007f9038420b41 in qemu_kvm_cpu_thread_fn (arg=0x7f903fe6f680)
    at /home/kraxel/projects/qemu/cpus.c:759
#6  0x00007f90366a7851 in start_thread (arg=0x7f901cfc7700) at
pthread_create.c:301
#7  0x00007f9030bf890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115

Thread 4 (Thread 0x7f9017fff700 (LWP 27871)):
#0  0x00007f9030bf0a47 in ioctl () at ../sysdeps/unix/syscall-template.S:82
#1  0x00007f9038474969 in kvm_vcpu_ioctl (cpu=<value optimized out>,
type=<value optimized out>)
    at /home/kraxel/projects/qemu/kvm-all.c:1667
#2  0x00007f9038476f8d in kvm_cpu_exec (env=0x7f903fea1090)
    at /home/kraxel/projects/qemu/kvm-all.c:1556
#3  0x00007f9038420b41 in qemu_kvm_cpu_thread_fn (arg=0x7f903fea1090)
    at /home/kraxel/projects/qemu/cpus.c:759
#4  0x00007f90366a7851 in start_thread (arg=0x7f9017fff700) at
pthread_create.c:301
#5  0x00007f9030bf890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115

Thread 3 (Thread 0x7f90161ff700 (LWP 27881)):
#0  0x00007f9030bef253 in __poll (fds=<value optimized out>, nfds=<value
optimized out>,
    timeout=<value optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:87
#1  0x00007f9031815eac in red_worker_main (arg=<value optimized out>) at
red_worker.c:11871
#2  0x00007f90366a7851 in start_thread (arg=0x7f90161ff700) at
pthread_create.c:301
#3  0x00007f9030bf890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115

Thread 2 (Thread 0x7f8f4bfff700 (LWP 27882)):
#0  0x00007f9030bef253 in __poll (fds=<value optimized out>, nfds=<value
optimized out>,
    timeout=<value optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:87
#1  0x00007f9031815eac in red_worker_main (arg=<value optimized out>) at
red_worker.c:11871
#2  0x00007f90366a7851 in start_thread (arg=0x7f8f4bfff700) at
pthread_create.c:301
#3  0x00007f9030bf890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115

Thread 1 (Thread 0x7f9038188980 (LWP 27849)):
---Type <return> to continue, or q <return> to quit---
#0  __lll_lock_wait () at
../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1  0x00007f90366a9388 in _L_lock_854 () from /lib64/libpthread.so.0
#2  0x00007f90366a9257 in __pthread_mutex_lock (mutex=0x7f903abb1538) at
pthread_mutex_lock.c:61
#3  0x00007f9037903c37 in ?? () from /lib64/libglib-2.0.so.0
#4  0x00007f90383c5d96 in io_watch_poll_finalize (source=<value
optimized out>)
    at /home/kraxel/projects/qemu/qemu-char.c:648
#5  0x00007f903790382a in ?? () from /lib64/libglib-2.0.so.0
#6  0x00007f9037903b85 in ?? () from /lib64/libglib-2.0.so.0
#7  0x00007f903790616e in g_source_remove () from /lib64/libglib-2.0.so.0
#8  0x00007f90383c6645 in tcp_chr_read (chan=<value optimized out>,
cond=<value optimized out>,
    opaque=0x7f903abb2fd0) at /home/kraxel/projects/qemu/qemu-char.c:2523
#9  0x00007f9037903f0e in g_main_context_dispatch () from
/lib64/libglib-2.0.so.0
#10 0x00007f903839ee09 in glib_pollfds_poll (nonblocking=<value
optimized out>)
    at /home/kraxel/projects/qemu/main-loop.c:187
#11 os_host_main_loop_wait (nonblocking=<value optimized out>)
    at /home/kraxel/projects/qemu/main-loop.c:232
#12 main_loop_wait (nonblocking=<value optimized out>)
    at /home/kraxel/projects/qemu/main-loop.c:468
#13 0x00007f90384168e5 in main_loop (argc=<value optimized out>,
argv=<value optimized out>,
    envp=<value optimized out>) at /home/kraxel/projects/qemu/vl.c:1990
#14 main (argc=<value optimized out>, argv=<value optimized out>,
envp=<value optimized out>)
    at /home/kraxel/projects/qemu/vl.c:4379

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-18  6:14       ` Gerd Hoffmann
@ 2013-04-18 12:38         ` Paolo Bonzini
  2013-04-18 12:54           ` Gerd Hoffmann
  2013-04-18 13:11           ` Paolo Bonzini
  0 siblings, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-18 12:38 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Amit Shah, qemu-devel

Il 18/04/2013 08:14, Gerd Hoffmann ha scritto:
> Thread 1 (Thread 0x7f9038188980 (LWP 27849)):
> ---Type <return> to continue, or q <return> to quit---
> #0  __lll_lock_wait () at
> ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
> #1  0x00007f90366a9388 in _L_lock_854 () from /lib64/libpthread.so.0
> #2  0x00007f90366a9257 in __pthread_mutex_lock (mutex=0x7f903abb1538) at
> pthread_mutex_lock.c:61
> #3  0x00007f9037903c37 in ?? () from /lib64/libglib-2.0.so.0
> #4  0x00007f90383c5d96 in io_watch_poll_finalize (source=<value
> optimized out>)
>     at /home/kraxel/projects/qemu/qemu-char.c:648

Hmm, this seems to be recursive locking.  It sounded unlikely, but then
googling for "glib gsource finalize unlock" led to this:

https://bugs.launchpad.net/ubuntu/+source/glib2.0/+bug/887946

and this:

https://mail.gnome.org/archives/commits-list/2010-November/msg01816.html

and this:

https://bugzilla.gnome.org/show_bug.cgi?id=586432
https://bugzilla.gnome.org/show_bug.cgi?id=626702

Comment 6 of the latter bug says that finalize-inside-finalize is in
fact broken without this fix.  If this is RHEL6, you or Amit should file
a bug.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-18 12:38         ` Paolo Bonzini
@ 2013-04-18 12:54           ` Gerd Hoffmann
  2013-04-18 13:11           ` Paolo Bonzini
  1 sibling, 0 replies; 12+ messages in thread
From: Gerd Hoffmann @ 2013-04-18 12:54 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Amit Shah, qemu-devel

  Hi,

> Hmm, this seems to be recursive locking.  It sounded unlikely, but then
> googling for "glib gsource finalize unlock" led to this:

> https://mail.gnome.org/archives/commits-list/2010-November/msg01816.html

> If this is RHEL6, you or Amit should file
> a bug.

Yes, it's RHEL-6.  Filed bug #953551.

Hmm, requiring a patched glib2 isn't ideal.
Is there some easy workaround?
Should we raise the minimum glib2 version?

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes
  2013-04-18 12:38         ` Paolo Bonzini
  2013-04-18 12:54           ` Gerd Hoffmann
@ 2013-04-18 13:11           ` Paolo Bonzini
  1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2013-04-18 13:11 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Amit Shah, qemu-devel

Il 18/04/2013 14:38, Paolo Bonzini ha scritto:
> Il 18/04/2013 08:14, Gerd Hoffmann ha scritto:
>> Thread 1 (Thread 0x7f9038188980 (LWP 27849)):
>> ---Type <return> to continue, or q <return> to quit---
>> #0  __lll_lock_wait () at
>> ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
>> #1  0x00007f90366a9388 in _L_lock_854 () from /lib64/libpthread.so.0
>> #2  0x00007f90366a9257 in __pthread_mutex_lock (mutex=0x7f903abb1538) at
>> pthread_mutex_lock.c:61
>> #3  0x00007f9037903c37 in ?? () from /lib64/libglib-2.0.so.0
>> #4  0x00007f90383c5d96 in io_watch_poll_finalize (source=<value
>> optimized out>)
>>     at /home/kraxel/projects/qemu/qemu-char.c:648
> 
> Hmm, this seems to be recursive locking.  It sounded unlikely, but then
> googling for "glib gsource finalize unlock" led to this:
> 
> https://bugs.launchpad.net/ubuntu/+source/glib2.0/+bug/887946
> 
> and this:
> 
> https://mail.gnome.org/archives/commits-list/2010-November/msg01816.html
> 
> and this:
> 
> https://bugzilla.gnome.org/show_bug.cgi?id=586432
> https://bugzilla.gnome.org/show_bug.cgi?id=626702
> 
> Comment 6 of the latter bug says that finalize-inside-finalize is in
> fact broken without this fix.  If this is RHEL6, you or Amit should file
> a bug.

Ok, anyway this is not caused by this series.  It is pre-existing and
likely it's what Amit was observing as well.

Filing a RHEL6 bug is probably the right thing to do, but we have to
work around it.  Can you test this:

---------------- 8< --------------------

>From 2db7ad3c07b048493b1bc38a5cc644076c0f68c1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 18 Apr 2013 15:05:35 +0200
Subject: [PATCH] qemu-char: do not operate on sources from finalize callbacks

Due to a glib bug, the finalize callback is called with the GMainContext
lock held.  Thus, any operation on the context from the callback will
cause recursive locking and a deadlock.

The fix for this is somewhat ugly, because we need to forego polymorphism
and implement our own function to destroy IOWatchPoll sources.  The
right thing to do here would be child sources, but we support older
glib versions that do not have them.  Not coincidentially, glib developers
found and fixed the deadlock as part of implementing child sources.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-char.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 6e897da..ffd08e6 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -643,12 +643,17 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
 
 static void io_watch_poll_finalize(GSource *source)
 {
+    /* Due to a glib bug, removing the last reference to a source
+     * inside a finalize callback causes recursive locking (and a
+     * deadlock).  For this reason, we call io_remove_watch_poll
+     * to remove this source.  At this point, iwp->src must
+     * be NULL, or we would leak it.
+     *
+     * This would be solved much more elegantly by child sources,
+     * but we support older glib versions that do not have them.
+     */
     IOWatchPoll *iwp = io_watch_poll_from_source(source);
-    if (iwp->src) {
-        g_source_destroy(iwp->src);
-        g_source_unref(iwp->src);
-        iwp->src = NULL;
-    }
+    assert(iwp->src == NULL);
 }
 
 static GSourceFuncs io_watch_poll_funcs = {
@@ -679,6 +684,25 @@ static guint io_add_watch_poll(GIOChannel *channel,
     return tag;
 }
 
+static void io_remove_watch_poll(guint tag)
+{
+    GSource *source;
+    IOWatchPoll *iwp;
+
+    g_return_if_fail (tag > 0);
+
+    source = g_main_context_find_source_by_id(NULL, tag);
+    g_return_if_fail (source != NULL);
+
+    iwp = io_watch_poll_from_source(source);
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    g_source_destroy(&iwp->parent);
+}
+
 #ifndef _WIN32
 static GIOChannel *io_channel_from_fd(int fd)
 {
@@ -788,7 +812,7 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
                                      len, &bytes_read, NULL);
     if (status == G_IO_STATUS_EOF) {
         if (s->fd_in_tag) {
-            g_source_remove(s->fd_in_tag);
+            io_remove_watch_poll(s->fd_in_tag);
             s->fd_in_tag = 0;
         }
         qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
@@ -821,7 +845,7 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     if (s->fd_in_tag) {
-        g_source_remove(s->fd_in_tag);
+        io_remove_watch_poll(s->fd_in_tag);
         s->fd_in_tag = 0;
     }
 
@@ -835,7 +859,7 @@ static void fd_chr_close(struct CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     if (s->fd_in_tag) {
-        g_source_remove(s->fd_in_tag);
+        io_remove_watch_poll(s->fd_in_tag);
         s->fd_in_tag = 0;
     }
 
@@ -1145,7 +1169,7 @@ static void pty_chr_state(CharDriverState *chr, int connected)
 
     if (!connected) {
         if (s->fd_tag) {
-            g_source_remove(s->fd_tag);
+            io_remove_watch_poll(s->fd_tag);
             s->fd_tag = 0;
         }
         s->connected = 0;
@@ -1173,7 +1197,7 @@ static void pty_chr_close(struct CharDriverState *chr)
     int fd;
 
     if (s->fd_tag) {
-        g_source_remove(s->fd_tag);
+        io_remove_watch_poll(s->fd_tag);
         s->fd_tag = 0;
     }
     fd = g_io_channel_unix_get_fd(s->fd);
@@ -2252,7 +2276,7 @@ static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     s->bufptr = s->bufcnt;
     if (status != G_IO_STATUS_NORMAL) {
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         return FALSE;
@@ -2273,7 +2297,7 @@ static void udp_chr_update_read_handler(CharDriverState *chr)
     NetCharDriver *s = chr->opaque;
 
     if (s->tag) {
-        g_source_remove(s->tag);
+        io_remove_watch_poll(s->tag);
         s->tag = 0;
     }
 
@@ -2286,7 +2310,7 @@ static void udp_chr_close(CharDriverState *chr)
 {
     NetCharDriver *s = chr->opaque;
     if (s->tag) {
-        g_source_remove(s->tag);
+        io_remove_watch_poll(s->tag);
         s->tag = 0;
     }
     if (s->chan) {
@@ -2520,7 +2544,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
             s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr);
         }
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         g_io_channel_unref(s->chan);
@@ -2635,7 +2659,7 @@ static void tcp_chr_close(CharDriverState *chr)
     TCPCharDriver *s = chr->opaque;
     if (s->fd >= 0) {
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         if (s->chan) {
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-04-18 13:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-16 11:10 [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Paolo Bonzini
2013-04-16 11:10 ` [Qemu-devel] [PATCH 1/3] qemu-char: use consistent idiom for removing sources Paolo Bonzini
2013-04-16 11:10 ` [Qemu-devel] [PATCH 2/3] qemu-char: simplify pty polling Paolo Bonzini
2013-04-16 11:10 ` [Qemu-devel] [PATCH 3/3] qemu-char: correct return value from chr_read functions Paolo Bonzini
2013-04-16 11:22 ` [Qemu-devel] [PATCH 0/3] another round of qemu-char fixes Gerd Hoffmann
2013-04-17  7:06   ` Gerd Hoffmann
2013-04-17  9:28     ` Paolo Bonzini
2013-04-17 10:17     ` Paolo Bonzini
2013-04-18  6:14       ` Gerd Hoffmann
2013-04-18 12:38         ` Paolo Bonzini
2013-04-18 12:54           ` Gerd Hoffmann
2013-04-18 13:11           ` Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).