[Qemu-devel] [PATCH v2] block: avoid SIGUSR2

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
@ 2011-09-19 14:37 Frediano Ziglio
  2011-09-19 15:02 ` Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Frediano Ziglio @ 2011-09-19 14:37 UTC (permalink / raw)
  To: aliguori, kwolf; +Cc: qemu-devel, Frediano Ziglio

Now that iothread is always compiled sending a signal seems only an
additional step. This patch also avoid writing to two pipe (one from signal
and one in qemu_service_io).

Work with kvm enabled or disabled. strace output is more readable (less syscalls).

Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
---
 cpus.c             |    5 -----
 posix-aio-compat.c |   29 +++++++++--------------------
 2 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/cpus.c b/cpus.c
index 54c188c..d0cfe91 100644
--- a/cpus.c
+++ b/cpus.c
@@ -380,11 +380,6 @@ static int qemu_signal_init(void)
     int sigfd;
     sigset_t set;
 
-    /* SIGUSR2 used by posix-aio-compat.c */
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
     /*
      * SIG_IPI must be blocked in the main thread and must not be caught
      * by sigwait() in the signal thread. Otherwise, the cpu thread will
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 3193dbf..185d5b2 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -42,7 +42,6 @@ struct qemu_paiocb {
     int aio_niov;
     size_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-    int ev_signo;
     off_t aio_offset;
 
     QTAILQ_ENTRY(qemu_paiocb) node;
@@ -309,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
     return nbytes;
 }
 
+static void posix_aio_notify_event(void);
+
 static void *aio_thread(void *unused)
 {
     pid_t pid;
@@ -381,7 +382,7 @@ static void *aio_thread(void *unused)
         aiocb->ret = ret;
         mutex_unlock(&lock);
 
-        if (kill(pid, aiocb->ev_signo)) die("kill failed");
+        posix_aio_notify_event();
     }
 
     cur_threads--;
@@ -548,18 +549,14 @@ static int posix_aio_flush(void *opaque)
 
 static PosixAioState *posix_aio_state;
 
-static void aio_signal_handler(int signum)
+static void posix_aio_notify_event(void)
 {
-    if (posix_aio_state) {
-        char byte = 0;
-        ssize_t ret;
-
-        ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
-        if (ret < 0 && errno != EAGAIN)
-            die("write()");
-    }
+    char byte = 0;
+    ssize_t ret;
 
-    qemu_service_io();
+    ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
+    if (ret < 0 && errno != EAGAIN)
+        die("write()");
 }
 
 static void paio_remove(struct qemu_paiocb *acb)
@@ -623,7 +620,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
         return NULL;
     acb->aio_type = type;
     acb->aio_fildes = fd;
-    acb->ev_signo = SIGUSR2;
 
     if (qiov) {
         acb->aio_iov = qiov->iov;
@@ -651,7 +647,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
         return NULL;
     acb->aio_type = QEMU_AIO_IOCTL;
     acb->aio_fildes = fd;
-    acb->ev_signo = SIGUSR2;
     acb->aio_offset = 0;
     acb->aio_ioctl_buf = buf;
     acb->aio_ioctl_cmd = req;
@@ -665,7 +660,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 
 int paio_init(void)
 {
-    struct sigaction act;
     PosixAioState *s;
     int fds[2];
     int ret;
@@ -675,11 +669,6 @@ int paio_init(void)
 
     s = g_malloc(sizeof(PosixAioState));
 
-    sigfillset(&act.sa_mask);
-    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-    act.sa_handler = aio_signal_handler;
-    sigaction(SIGUSR2, &act, NULL);
-
     s->first_aio = NULL;
     if (qemu_pipe(fds) == -1) {
         fprintf(stderr, "failed to create pipe\n");
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-09-19 14:37 [Qemu-devel] [PATCH v2] block: avoid SIGUSR2 Frediano Ziglio
@ 2011-09-19 15:02 ` Paolo Bonzini
  2011-09-19 15:11   ` Kevin Wolf
  2011-09-19 15:15 ` Kevin Wolf
  2011-10-27 13:26 ` Kevin Wolf
  2 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2011-09-19 15:02 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: kwolf, aliguori, qemu-devel

On 09/19/2011 04:37 PM, Frediano Ziglio wrote:
> Now that iothread is always compiled sending a signal seems only an
> additional step. This patch also avoid writing to two pipe (one from signal
> and one in qemu_service_io).
>
> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>
> Signed-off-by: Frediano Ziglio<freddy77@gmail.com>
> ---
>   cpus.c             |    5 -----
>   posix-aio-compat.c |   29 +++++++++--------------------
>   2 files changed, 9 insertions(+), 25 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 54c188c..d0cfe91 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -380,11 +380,6 @@ static int qemu_signal_init(void)
>       int sigfd;
>       sigset_t set;
>
> -    /* SIGUSR2 used by posix-aio-compat.c */
> -    sigemptyset(&set);
> -    sigaddset(&set, SIGUSR2);
> -    pthread_sigmask(SIG_UNBLOCK,&set, NULL);
> -
>       /*
>        * SIG_IPI must be blocked in the main thread and must not be caught
>        * by sigwait() in the signal thread. Otherwise, the cpu thread will
> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 3193dbf..185d5b2 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -42,7 +42,6 @@ struct qemu_paiocb {
>       int aio_niov;
>       size_t aio_nbytes;
>   #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
> -    int ev_signo;
>       off_t aio_offset;
>
>       QTAILQ_ENTRY(qemu_paiocb) node;
> @@ -309,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
>       return nbytes;
>   }
>
> +static void posix_aio_notify_event(void);
> +
>   static void *aio_thread(void *unused)
>   {
>       pid_t pid;
> @@ -381,7 +382,7 @@ static void *aio_thread(void *unused)
>           aiocb->ret = ret;
>           mutex_unlock(&lock);
>
> -        if (kill(pid, aiocb->ev_signo)) die("kill failed");
> +        posix_aio_notify_event();
>       }
>
>       cur_threads--;
> @@ -548,18 +549,14 @@ static int posix_aio_flush(void *opaque)
>
>   static PosixAioState *posix_aio_state;
>
> -static void aio_signal_handler(int signum)
> +static void posix_aio_notify_event(void)
>   {
> -    if (posix_aio_state) {
> -        char byte = 0;
> -        ssize_t ret;
> -
> -        ret = write(posix_aio_state->wfd,&byte, sizeof(byte));
> -        if (ret<  0&&  errno != EAGAIN)
> -            die("write()");
> -    }
> +    char byte = 0;
> +    ssize_t ret;
>
> -    qemu_service_io();
> +    ret = write(posix_aio_state->wfd,&byte, sizeof(byte));
> +    if (ret<  0&&  errno != EAGAIN)
> +        die("write()");
>   }
>
>   static void paio_remove(struct qemu_paiocb *acb)
> @@ -623,7 +620,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>           return NULL;
>       acb->aio_type = type;
>       acb->aio_fildes = fd;
> -    acb->ev_signo = SIGUSR2;
>
>       if (qiov) {
>           acb->aio_iov = qiov->iov;
> @@ -651,7 +647,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>           return NULL;
>       acb->aio_type = QEMU_AIO_IOCTL;
>       acb->aio_fildes = fd;
> -    acb->ev_signo = SIGUSR2;
>       acb->aio_offset = 0;
>       acb->aio_ioctl_buf = buf;
>       acb->aio_ioctl_cmd = req;
> @@ -665,7 +660,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>
>   int paio_init(void)
>   {
> -    struct sigaction act;
>       PosixAioState *s;
>       int fds[2];
>       int ret;
> @@ -675,11 +669,6 @@ int paio_init(void)
>
>       s = g_malloc(sizeof(PosixAioState));
>
> -    sigfillset(&act.sa_mask);
> -    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
> -    act.sa_handler = aio_signal_handler;
> -    sigaction(SIGUSR2,&act, NULL);
> -
>       s->first_aio = NULL;
>       if (qemu_pipe(fds) == -1) {
>           fprintf(stderr, "failed to create pipe\n");

I think it is possible to go a step further, turn 
posix_aio_process_queue into a bottom half and get rid of the pipe 
altogether.  This in turn would remove the only real user of 
io_process_queue in qemu_aio_set_fd_handler.  However, this is already a 
nice improvement.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-09-19 15:02 ` Paolo Bonzini
@ 2011-09-19 15:11   ` Kevin Wolf
  2011-09-19 15:25     ` Paolo Bonzini
  0 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2011-09-19 15:11 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: aliguori, Frediano Ziglio, Qemu-devel

Am 19.09.2011 17:02, schrieb Paolo Bonzini:
> On 09/19/2011 04:37 PM, Frediano Ziglio wrote:
>> Now that iothread is always compiled sending a signal seems only an
>> additional step. This patch also avoid writing to two pipe (one from signal
>> and one in qemu_service_io).
>>
>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>
>> Signed-off-by: Frediano Ziglio<freddy77@gmail.com>
>> ---
>>   cpus.c             |    5 -----
>>   posix-aio-compat.c |   29 +++++++++--------------------
>>   2 files changed, 9 insertions(+), 25 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 54c188c..d0cfe91 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -380,11 +380,6 @@ static int qemu_signal_init(void)
>>       int sigfd;
>>       sigset_t set;
>>
>> -    /* SIGUSR2 used by posix-aio-compat.c */
>> -    sigemptyset(&set);
>> -    sigaddset(&set, SIGUSR2);
>> -    pthread_sigmask(SIG_UNBLOCK,&set, NULL);
>> -
>>       /*
>>        * SIG_IPI must be blocked in the main thread and must not be caught
>>        * by sigwait() in the signal thread. Otherwise, the cpu thread will
>> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
>> index 3193dbf..185d5b2 100644
>> --- a/posix-aio-compat.c
>> +++ b/posix-aio-compat.c
>> @@ -42,7 +42,6 @@ struct qemu_paiocb {
>>       int aio_niov;
>>       size_t aio_nbytes;
>>   #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
>> -    int ev_signo;
>>       off_t aio_offset;
>>
>>       QTAILQ_ENTRY(qemu_paiocb) node;
>> @@ -309,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
>>       return nbytes;
>>   }
>>
>> +static void posix_aio_notify_event(void);
>> +
>>   static void *aio_thread(void *unused)
>>   {
>>       pid_t pid;
>> @@ -381,7 +382,7 @@ static void *aio_thread(void *unused)
>>           aiocb->ret = ret;
>>           mutex_unlock(&lock);
>>
>> -        if (kill(pid, aiocb->ev_signo)) die("kill failed");
>> +        posix_aio_notify_event();
>>       }
>>
>>       cur_threads--;
>> @@ -548,18 +549,14 @@ static int posix_aio_flush(void *opaque)
>>
>>   static PosixAioState *posix_aio_state;
>>
>> -static void aio_signal_handler(int signum)
>> +static void posix_aio_notify_event(void)
>>   {
>> -    if (posix_aio_state) {
>> -        char byte = 0;
>> -        ssize_t ret;
>> -
>> -        ret = write(posix_aio_state->wfd,&byte, sizeof(byte));
>> -        if (ret<  0&&  errno != EAGAIN)
>> -            die("write()");
>> -    }
>> +    char byte = 0;
>> +    ssize_t ret;
>>
>> -    qemu_service_io();
>> +    ret = write(posix_aio_state->wfd,&byte, sizeof(byte));
>> +    if (ret<  0&&  errno != EAGAIN)
>> +        die("write()");
>>   }
>>
>>   static void paio_remove(struct qemu_paiocb *acb)
>> @@ -623,7 +620,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>>           return NULL;
>>       acb->aio_type = type;
>>       acb->aio_fildes = fd;
>> -    acb->ev_signo = SIGUSR2;
>>
>>       if (qiov) {
>>           acb->aio_iov = qiov->iov;
>> @@ -651,7 +647,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>>           return NULL;
>>       acb->aio_type = QEMU_AIO_IOCTL;
>>       acb->aio_fildes = fd;
>> -    acb->ev_signo = SIGUSR2;
>>       acb->aio_offset = 0;
>>       acb->aio_ioctl_buf = buf;
>>       acb->aio_ioctl_cmd = req;
>> @@ -665,7 +660,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>>
>>   int paio_init(void)
>>   {
>> -    struct sigaction act;
>>       PosixAioState *s;
>>       int fds[2];
>>       int ret;
>> @@ -675,11 +669,6 @@ int paio_init(void)
>>
>>       s = g_malloc(sizeof(PosixAioState));
>>
>> -    sigfillset(&act.sa_mask);
>> -    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
>> -    act.sa_handler = aio_signal_handler;
>> -    sigaction(SIGUSR2,&act, NULL);
>> -
>>       s->first_aio = NULL;
>>       if (qemu_pipe(fds) == -1) {
>>           fprintf(stderr, "failed to create pipe\n");
> 
> I think it is possible to go a step further, turn 
> posix_aio_process_queue into a bottom half and get rid of the pipe 
> altogether.  This in turn would remove the only real user of 
> io_process_queue in qemu_aio_set_fd_handler.  However, this is already a 
> nice improvement.

But without the fd, wouldn't the I/O thread possibly wait for much
longer until its select() times out and it starts processing BHs?

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-09-19 14:37 [Qemu-devel] [PATCH v2] block: avoid SIGUSR2 Frediano Ziglio
  2011-09-19 15:02 ` Paolo Bonzini
@ 2011-09-19 15:15 ` Kevin Wolf
  2011-10-27 13:26 ` Kevin Wolf
  2 siblings, 0 replies; 17+ messages in thread
From: Kevin Wolf @ 2011-09-19 15:15 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: aliguori, qemu-devel

Am 19.09.2011 16:37, schrieb Frediano Ziglio:
> Now that iothread is always compiled sending a signal seems only an
> additional step. This patch also avoid writing to two pipe (one from signal
> and one in qemu_service_io).
> 
> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
> 
> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>

Thanks applied to the block branch.

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-09-19 15:11   ` Kevin Wolf
@ 2011-09-19 15:25     ` Paolo Bonzini
  0 siblings, 0 replies; 17+ messages in thread
From: Paolo Bonzini @ 2011-09-19 15:25 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: aliguori, Frediano Ziglio, Qemu-devel

On 09/19/2011 05:11 PM, Kevin Wolf wrote:
>> >  I think it is possible to go a step further, turn
>> >  posix_aio_process_queue into a bottom half and get rid of the pipe
>> >  altogether.  This in turn would remove the only real user of
>> >  io_process_queue in qemu_aio_set_fd_handler.  However, this is already a
>> >  nice improvement.
> But without the fd, wouldn't the I/O thread possibly wait for much
> longer until its select() times out and it starts processing BHs?

Hmm, in qemu_aio_wait yes...  In the normal qemu event loop, however, 
bottom halves exit the select loop with qemu_notify_event().  qemu 
currently has a 1-second timeout for the select, but it should work just 
as well with an infinite timeout.  If it doesn't, it's a bug.

It should be possible to turn posix_aio_process_queue into a bottom 
half, but the pipe is still necessary in order to exit the qemu_aio_wait 
select loop and schedule the bottom half.

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-09-19 14:37 [Qemu-devel] [PATCH v2] block: avoid SIGUSR2 Frediano Ziglio
  2011-09-19 15:02 ` Paolo Bonzini
  2011-09-19 15:15 ` Kevin Wolf
@ 2011-10-27 13:26 ` Kevin Wolf
  2011-10-27 13:57   ` Stefan Hajnoczi
  2 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2011-10-27 13:26 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: Paolo Bonzini, aliguori, qemu-devel, Stefan Hajnoczi

Am 19.09.2011 16:37, schrieb Frediano Ziglio:
> Now that iothread is always compiled sending a signal seems only an
> additional step. This patch also avoid writing to two pipe (one from signal
> and one in qemu_service_io).
> 
> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
> 
> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>

Something in this change has bad effects, in the sense that it seems to
break bdrv_read_em.

To be precise, what I'm testing is booting from a DOS installation
floppy (interestingly, on my laptop it just works, but on my other test
box it fails). The first attempt of git bisect pointed at the commit
where we converted bdrv_read/write to coroutines.

However, it turned out that the conversion commit only caused problems
because instead of using a synchronous read() it now goes through
posix-aio-compat.c. The problem is reproducible in pre-coroutine
versions by just commenting out .bdrv_read/.bdrv_write in raw-posix.

Going back a bit more showed that this did work fine a while ago, and
the removal of SIGUSR2 is the first commit in which bdrv_read_em didn't
provide the same behaviour as bdrv_read any more.

I have no idea yet what's really going wrong, but maybe it rings a bell
for one of you?

Kevin

> ---
>  cpus.c             |    5 -----
>  posix-aio-compat.c |   29 +++++++++--------------------
>  2 files changed, 9 insertions(+), 25 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 54c188c..d0cfe91 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -380,11 +380,6 @@ static int qemu_signal_init(void)
>      int sigfd;
>      sigset_t set;
>  
> -    /* SIGUSR2 used by posix-aio-compat.c */
> -    sigemptyset(&set);
> -    sigaddset(&set, SIGUSR2);
> -    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
> -
>      /*
>       * SIG_IPI must be blocked in the main thread and must not be caught
>       * by sigwait() in the signal thread. Otherwise, the cpu thread will
> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 3193dbf..185d5b2 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -42,7 +42,6 @@ struct qemu_paiocb {
>      int aio_niov;
>      size_t aio_nbytes;
>  #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
> -    int ev_signo;
>      off_t aio_offset;
>  
>      QTAILQ_ENTRY(qemu_paiocb) node;
> @@ -309,6 +308,8 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
>      return nbytes;
>  }
>  
> +static void posix_aio_notify_event(void);
> +
>  static void *aio_thread(void *unused)
>  {
>      pid_t pid;
> @@ -381,7 +382,7 @@ static void *aio_thread(void *unused)
>          aiocb->ret = ret;
>          mutex_unlock(&lock);
>  
> -        if (kill(pid, aiocb->ev_signo)) die("kill failed");
> +        posix_aio_notify_event();
>      }
>  
>      cur_threads--;
> @@ -548,18 +549,14 @@ static int posix_aio_flush(void *opaque)
>  
>  static PosixAioState *posix_aio_state;
>  
> -static void aio_signal_handler(int signum)
> +static void posix_aio_notify_event(void)
>  {
> -    if (posix_aio_state) {
> -        char byte = 0;
> -        ssize_t ret;
> -
> -        ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
> -        if (ret < 0 && errno != EAGAIN)
> -            die("write()");
> -    }
> +    char byte = 0;
> +    ssize_t ret;
>  
> -    qemu_service_io();
> +    ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
> +    if (ret < 0 && errno != EAGAIN)
> +        die("write()");
>  }
>  
>  static void paio_remove(struct qemu_paiocb *acb)
> @@ -623,7 +620,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
>          return NULL;
>      acb->aio_type = type;
>      acb->aio_fildes = fd;
> -    acb->ev_signo = SIGUSR2;
>  
>      if (qiov) {
>          acb->aio_iov = qiov->iov;
> @@ -651,7 +647,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>          return NULL;
>      acb->aio_type = QEMU_AIO_IOCTL;
>      acb->aio_fildes = fd;
> -    acb->ev_signo = SIGUSR2;
>      acb->aio_offset = 0;
>      acb->aio_ioctl_buf = buf;
>      acb->aio_ioctl_cmd = req;
> @@ -665,7 +660,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
>  
>  int paio_init(void)
>  {
> -    struct sigaction act;
>      PosixAioState *s;
>      int fds[2];
>      int ret;
> @@ -675,11 +669,6 @@ int paio_init(void)
>  
>      s = g_malloc(sizeof(PosixAioState));
>  
> -    sigfillset(&act.sa_mask);
> -    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
> -    act.sa_handler = aio_signal_handler;
> -    sigaction(SIGUSR2, &act, NULL);
> -
>      s->first_aio = NULL;
>      if (qemu_pipe(fds) == -1) {
>          fprintf(stderr, "failed to create pipe\n");

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-27 13:26 ` Kevin Wolf
@ 2011-10-27 13:57   ` Stefan Hajnoczi
  2011-10-27 14:15     ` Kevin Wolf
  0 siblings, 1 reply; 17+ messages in thread
From: Stefan Hajnoczi @ 2011-10-27 13:57 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Paolo Bonzini, aliguori, Frediano Ziglio, qemu-devel

On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
> > Now that iothread is always compiled sending a signal seems only an
> > additional step. This patch also avoid writing to two pipe (one from signal
> > and one in qemu_service_io).
> > 
> > Work with kvm enabled or disabled. strace output is more readable (less syscalls).
> > 
> > Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
> 
> Something in this change has bad effects, in the sense that it seems to
> break bdrv_read_em.

How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
utilization or deadlocked?

One interesting thing is that qemu_aio_wait() does not release the QEMU
mutex, so we cannot write to a pipe with the mutex held and then spin
waiting for the iothread to do work for us.

Exactly how kill and qemu_notify_event() were different I'm not sure
right now but it could be a factor.

Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-27 13:57   ` Stefan Hajnoczi
@ 2011-10-27 14:15     ` Kevin Wolf
  2011-10-27 14:32       ` Kevin Wolf
  0 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2011-10-27 14:15 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Paolo Bonzini, aliguori, Frediano Ziglio, qemu-devel

Am 27.10.2011 15:57, schrieb Stefan Hajnoczi:
> On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
>> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
>>> Now that iothread is always compiled sending a signal seems only an
>>> additional step. This patch also avoid writing to two pipe (one from signal
>>> and one in qemu_service_io).
>>>
>>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>>
>>> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
>>
>> Something in this change has bad effects, in the sense that it seems to
>> break bdrv_read_em.
> 
> How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
> utilization or deadlocked?

Sorry, I should have been more detailed here.

No, it's nothing obvious, it must be some subtle side effect. The result
of bdrv_read_em itself seems to be correct (return value and checksum of
the read buffer).

However instead of booting into the DOS setup I only get an error
message "Kein System oder Laufwerksfehler" (don't know how it reads in
English DOS versions), which seems to be produced by the boot sector.

I excluded all of the minor changes, so I'm sure that it's caused by the
switch from kill() to a direct call of the function that writes into the
pipe.

> One interesting thing is that qemu_aio_wait() does not release the QEMU
> mutex, so we cannot write to a pipe with the mutex held and then spin
> waiting for the iothread to do work for us.
> 
> Exactly how kill and qemu_notify_event() were different I'm not sure
> right now but it could be a factor.

This would cause a hang, right? Then it isn't what I'm seeing.

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-27 14:15     ` Kevin Wolf
@ 2011-10-27 14:32       ` Kevin Wolf
  2011-10-28 11:33         ` Kevin Wolf
  0 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2011-10-27 14:32 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Paolo Bonzini, aliguori, Frediano Ziglio, qemu-devel

Am 27.10.2011 16:15, schrieb Kevin Wolf:
> Am 27.10.2011 15:57, schrieb Stefan Hajnoczi:
>> On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
>>> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
>>>> Now that iothread is always compiled sending a signal seems only an
>>>> additional step. This patch also avoid writing to two pipe (one from signal
>>>> and one in qemu_service_io).
>>>>
>>>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>>>
>>>> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
>>>
>>> Something in this change has bad effects, in the sense that it seems to
>>> break bdrv_read_em.
>>
>> How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
>> utilization or deadlocked?
> 
> Sorry, I should have been more detailed here.
> 
> No, it's nothing obvious, it must be some subtle side effect. The result
> of bdrv_read_em itself seems to be correct (return value and checksum of
> the read buffer).
> 
> However instead of booting into the DOS setup I only get an error
> message "Kein System oder Laufwerksfehler" (don't know how it reads in
> English DOS versions), which seems to be produced by the boot sector.
> 
> I excluded all of the minor changes, so I'm sure that it's caused by the
> switch from kill() to a direct call of the function that writes into the
> pipe.
> 
>> One interesting thing is that qemu_aio_wait() does not release the QEMU
>> mutex, so we cannot write to a pipe with the mutex held and then spin
>> waiting for the iothread to do work for us.
>>
>> Exactly how kill and qemu_notify_event() were different I'm not sure
>> right now but it could be a factor.
> 
> This would cause a hang, right? Then it isn't what I'm seeing.

While trying out some more things, I added some fprintfs to
posix_aio_process_queue() and suddenly it also fails with the kill()
version. So what has changed might really just be the timing, and it
could be a race somewhere that has always (?) existed.

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-27 14:32       ` Kevin Wolf
@ 2011-10-28 11:33         ` Kevin Wolf
  2011-10-28 11:35           ` Kevin Wolf
                             ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Kevin Wolf @ 2011-10-28 11:33 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Lucas Meneghel Rodrigues, aliguori, qemu-devel, Frediano Ziglio,
	Cleber Rosa, Paolo Bonzini

Am 27.10.2011 16:32, schrieb Kevin Wolf:
> Am 27.10.2011 16:15, schrieb Kevin Wolf:
>> Am 27.10.2011 15:57, schrieb Stefan Hajnoczi:
>>> On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
>>>> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
>>>>> Now that iothread is always compiled sending a signal seems only an
>>>>> additional step. This patch also avoid writing to two pipe (one from signal
>>>>> and one in qemu_service_io).
>>>>>
>>>>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>>>>
>>>>> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
>>>>
>>>> Something in this change has bad effects, in the sense that it seems to
>>>> break bdrv_read_em.
>>>
>>> How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
>>> utilization or deadlocked?
>>
>> Sorry, I should have been more detailed here.
>>
>> No, it's nothing obvious, it must be some subtle side effect. The result
>> of bdrv_read_em itself seems to be correct (return value and checksum of
>> the read buffer).
>>
>> However instead of booting into the DOS setup I only get an error
>> message "Kein System oder Laufwerksfehler" (don't know how it reads in
>> English DOS versions), which seems to be produced by the boot sector.
>>
>> I excluded all of the minor changes, so I'm sure that it's caused by the
>> switch from kill() to a direct call of the function that writes into the
>> pipe.
>>
>>> One interesting thing is that qemu_aio_wait() does not release the QEMU
>>> mutex, so we cannot write to a pipe with the mutex held and then spin
>>> waiting for the iothread to do work for us.
>>>
>>> Exactly how kill and qemu_notify_event() were different I'm not sure
>>> right now but it could be a factor.
>>
>> This would cause a hang, right? Then it isn't what I'm seeing.
> 
> While trying out some more things, I added some fprintfs to
> posix_aio_process_queue() and suddenly it also fails with the kill()
> version. So what has changed might really just be the timing, and it
> could be a race somewhere that has always (?) existed.

Replying to myself again... It looks like there is a problem with
reentrancy in fdctrl_transfer_handler. I think this would have been
guarded by the AsyncContexts before, but we don't have them any more.

qemu-system-x86_64: /root/upstream/qemu/hw/fdc.c:1253:
fdctrl_transfer_handler: Assertion `reentrancy == 0' failed.

Program received signal SIGABRT, Aborted.

(gdb) bt
#0  0x0000003ccd2329a5 in raise () from /lib64/libc.so.6
#1  0x0000003ccd234185 in abort () from /lib64/libc.so.6
#2  0x0000003ccd22b935 in __assert_fail () from /lib64/libc.so.6
#3  0x000000000046ff09 in fdctrl_transfer_handler (opaque=<value
optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
    dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1253
#4  0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
#5  DMA_run () at /root/upstream/qemu/hw/dma.c:378
#6  0x000000000040b0e1 in qemu_bh_poll () at async.c:70
#7  0x000000000040aa19 in qemu_aio_wait () at aio.c:147
#8  0x000000000041c355 in bdrv_read_em (bs=0x131fd80, sector_num=19,
buf=<value optimized out>, nb_sectors=1) at block.c:2896
#9  0x000000000041b3d2 in bdrv_read (bs=0x131fd80, sector_num=19,
buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
#10 0x000000000041b3d2 in bdrv_read (bs=0x131f430, sector_num=19,
buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
#11 0x000000000046fbb8 in do_fdctrl_transfer_handler (opaque=0x1785788,
nchan=2, dma_pos=<value optimized out>, dma_len=512)
    at /root/upstream/qemu/hw/fdc.c:1178
#12 0x000000000046fecf in fdctrl_transfer_handler (opaque=<value
optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
    dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1255
#13 0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
#14 DMA_run () at /root/upstream/qemu/hw/dma.c:378
#15 0x000000000046e456 in fdctrl_start_transfer (fdctrl=0x1785788,
direction=1) at /root/upstream/qemu/hw/fdc.c:1107
#16 0x0000000000558a41 in kvm_handle_io (env=0x1323ff0) at
/root/upstream/qemu/kvm-all.c:834
#17 kvm_cpu_exec (env=0x1323ff0) at /root/upstream/qemu/kvm-all.c:976
#18 0x000000000053686a in qemu_kvm_cpu_thread_fn (arg=0x1323ff0) at
/root/upstream/qemu/cpus.c:661
#19 0x0000003ccda077e1 in start_thread () from /lib64/libpthread.so.0
#20 0x0000003ccd2e151d in clone () from /lib64/libc.so.6

I'm afraid that we can only avoid things like this reliably if we
convert all devices to be direct users of AIO/coroutines. The current
block layer infrastructure doesn't emulate the behaviour of bdrv_read
accurately as bottom halves can be run in the nested main loop.

For floppy, the following seems to be a quick fix (Lucas, Cleber, does
this solve your problems?), though it's not very satisfying. And I'm not
quite sure yet why it doesn't always happen with kill() in
posix-aio-compat.c.

diff --git a/hw/dma.c b/hw/dma.c
index 8a7302a..1d3b6f1 100644
--- a/hw/dma.c
+++ b/hw/dma.c
@@ -358,6 +358,13 @@ static void DMA_run (void)
     struct dma_cont *d;
     int icont, ichan;
     int rearm = 0;
+    static int running = 0;
+
+    if (running) {
+        goto out;
+    } else {
+        running = 0;
+    }

     d = dma_controllers;

@@ -374,6 +381,8 @@ static void DMA_run (void)
         }
     }

+out:
+    running = 0;
     if (rearm)
         qemu_bh_schedule_idle(dma_bh);
 }

Kevin

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 11:33         ` Kevin Wolf
@ 2011-10-28 11:35           ` Kevin Wolf
  2011-10-28 11:50           ` Paolo Bonzini
  2011-10-28 12:20           ` Cleber Rosa
  2 siblings, 0 replies; 17+ messages in thread
From: Kevin Wolf @ 2011-10-28 11:35 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Lucas Meneghel Rodrigues, aliguori, qemu-devel, Frediano Ziglio,
	Cleber Rosa, Paolo Bonzini

Am 28.10.2011 13:33, schrieb Kevin Wolf:
> Am 27.10.2011 16:32, schrieb Kevin Wolf:
>> Am 27.10.2011 16:15, schrieb Kevin Wolf:
>>> Am 27.10.2011 15:57, schrieb Stefan Hajnoczi:
>>>> On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
>>>>> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
>>>>>> Now that iothread is always compiled sending a signal seems only an
>>>>>> additional step. This patch also avoid writing to two pipe (one from signal
>>>>>> and one in qemu_service_io).
>>>>>>
>>>>>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>>>>>
>>>>>> Signed-off-by: Frediano Ziglio <freddy77@gmail.com>
>>>>>
>>>>> Something in this change has bad effects, in the sense that it seems to
>>>>> break bdrv_read_em.
>>>>
>>>> How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
>>>> utilization or deadlocked?
>>>
>>> Sorry, I should have been more detailed here.
>>>
>>> No, it's nothing obvious, it must be some subtle side effect. The result
>>> of bdrv_read_em itself seems to be correct (return value and checksum of
>>> the read buffer).
>>>
>>> However instead of booting into the DOS setup I only get an error
>>> message "Kein System oder Laufwerksfehler" (don't know how it reads in
>>> English DOS versions), which seems to be produced by the boot sector.
>>>
>>> I excluded all of the minor changes, so I'm sure that it's caused by the
>>> switch from kill() to a direct call of the function that writes into the
>>> pipe.
>>>
>>>> One interesting thing is that qemu_aio_wait() does not release the QEMU
>>>> mutex, so we cannot write to a pipe with the mutex held and then spin
>>>> waiting for the iothread to do work for us.
>>>>
>>>> Exactly how kill and qemu_notify_event() were different I'm not sure
>>>> right now but it could be a factor.
>>>
>>> This would cause a hang, right? Then it isn't what I'm seeing.
>>
>> While trying out some more things, I added some fprintfs to
>> posix_aio_process_queue() and suddenly it also fails with the kill()
>> version. So what has changed might really just be the timing, and it
>> could be a race somewhere that has always (?) existed.
> 
> Replying to myself again... It looks like there is a problem with
> reentrancy in fdctrl_transfer_handler. I think this would have been
> guarded by the AsyncContexts before, but we don't have them any more.
> 
> qemu-system-x86_64: /root/upstream/qemu/hw/fdc.c:1253:
> fdctrl_transfer_handler: Assertion `reentrancy == 0' failed.
> 
> Program received signal SIGABRT, Aborted.
> 
> (gdb) bt
> #0  0x0000003ccd2329a5 in raise () from /lib64/libc.so.6
> #1  0x0000003ccd234185 in abort () from /lib64/libc.so.6
> #2  0x0000003ccd22b935 in __assert_fail () from /lib64/libc.so.6
> #3  0x000000000046ff09 in fdctrl_transfer_handler (opaque=<value
> optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
>     dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1253
> #4  0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
> #5  DMA_run () at /root/upstream/qemu/hw/dma.c:378
> #6  0x000000000040b0e1 in qemu_bh_poll () at async.c:70
> #7  0x000000000040aa19 in qemu_aio_wait () at aio.c:147
> #8  0x000000000041c355 in bdrv_read_em (bs=0x131fd80, sector_num=19,
> buf=<value optimized out>, nb_sectors=1) at block.c:2896
> #9  0x000000000041b3d2 in bdrv_read (bs=0x131fd80, sector_num=19,
> buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
> #10 0x000000000041b3d2 in bdrv_read (bs=0x131f430, sector_num=19,
> buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
> #11 0x000000000046fbb8 in do_fdctrl_transfer_handler (opaque=0x1785788,
> nchan=2, dma_pos=<value optimized out>, dma_len=512)
>     at /root/upstream/qemu/hw/fdc.c:1178
> #12 0x000000000046fecf in fdctrl_transfer_handler (opaque=<value
> optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
>     dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1255
> #13 0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
> #14 DMA_run () at /root/upstream/qemu/hw/dma.c:378
> #15 0x000000000046e456 in fdctrl_start_transfer (fdctrl=0x1785788,
> direction=1) at /root/upstream/qemu/hw/fdc.c:1107
> #16 0x0000000000558a41 in kvm_handle_io (env=0x1323ff0) at
> /root/upstream/qemu/kvm-all.c:834
> #17 kvm_cpu_exec (env=0x1323ff0) at /root/upstream/qemu/kvm-all.c:976
> #18 0x000000000053686a in qemu_kvm_cpu_thread_fn (arg=0x1323ff0) at
> /root/upstream/qemu/cpus.c:661
> #19 0x0000003ccda077e1 in start_thread () from /lib64/libpthread.so.0
> #20 0x0000003ccd2e151d in clone () from /lib64/libc.so.6
> 
> I'm afraid that we can only avoid things like this reliably if we
> convert all devices to be direct users of AIO/coroutines. The current
> block layer infrastructure doesn't emulate the behaviour of bdrv_read
> accurately as bottom halves can be run in the nested main loop.
> 
> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
> this solve your problems?), though it's not very satisfying. And I'm not
> quite sure yet why it doesn't always happen with kill() in
> posix-aio-compat.c.
> 
> diff --git a/hw/dma.c b/hw/dma.c
> index 8a7302a..1d3b6f1 100644
> --- a/hw/dma.c
> +++ b/hw/dma.c
> @@ -358,6 +358,13 @@ static void DMA_run (void)
>      struct dma_cont *d;
>      int icont, ichan;
>      int rearm = 0;
> +    static int running = 0;
> +
> +    if (running) {
> +        goto out;
> +    } else {
> +        running = 0;

running = 1, obviously. I had the fix disabled for testing something.

> +    }
> 
>      d = dma_controllers;
> 
> @@ -374,6 +381,8 @@ static void DMA_run (void)
>          }
>      }
> 
> +out:
> +    running = 0;
>      if (rearm)
>          qemu_bh_schedule_idle(dma_bh);
>  }
> 
> Kevin
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 11:33         ` Kevin Wolf
  2011-10-28 11:35           ` Kevin Wolf
@ 2011-10-28 11:50           ` Paolo Bonzini
  2011-10-28 12:29             ` Kevin Wolf
  2011-10-28 12:20           ` Cleber Rosa
  2 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2011-10-28 11:50 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lucas Meneghel Rodrigues, aliguori, Stefan Hajnoczi, qemu-devel,
	Frediano Ziglio, Cleber Rosa

On 10/28/2011 01:33 PM, Kevin Wolf wrote:
> I'm afraid that we can only avoid things like this reliably if we
> convert all devices to be direct users of AIO/coroutines. The current
> block layer infrastructure doesn't emulate the behaviour of bdrv_read
> accurately as bottom halves can be run in the nested main loop.
>
> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
> this solve your problems?), though it's not very satisfying. And I'm not
> quite sure yet why it doesn't always happen with kill() in
> posix-aio-compat.c.

Another "fix" is to change idle bottom halves (at least the one in 
hw/dma.c) to 10ms timers.

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 11:33         ` Kevin Wolf
  2011-10-28 11:35           ` Kevin Wolf
  2011-10-28 11:50           ` Paolo Bonzini
@ 2011-10-28 12:20           ` Cleber Rosa
  2 siblings, 0 replies; 17+ messages in thread
From: Cleber Rosa @ 2011-10-28 12:20 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lucas Meneghel Rodrigues, aliguori, Stefan Hajnoczi, qemu-devel,
	Frediano Ziglio, Paolo Bonzini

On 10/28/2011 08:33 AM, Kevin Wolf wrote:
> Am 27.10.2011 16:32, schrieb Kevin Wolf:
>> Am 27.10.2011 16:15, schrieb Kevin Wolf:
>>> Am 27.10.2011 15:57, schrieb Stefan Hajnoczi:
>>>> On Thu, Oct 27, 2011 at 03:26:23PM +0200, Kevin Wolf wrote:
>>>>> Am 19.09.2011 16:37, schrieb Frediano Ziglio:
>>>>>> Now that iothread is always compiled sending a signal seems only an
>>>>>> additional step. This patch also avoid writing to two pipe (one from signal
>>>>>> and one in qemu_service_io).
>>>>>>
>>>>>> Work with kvm enabled or disabled. strace output is more readable (less syscalls).
>>>>>>
>>>>>> Signed-off-by: Frediano Ziglio<freddy77@gmail.com>
>>>>> Something in this change has bad effects, in the sense that it seems to
>>>>> break bdrv_read_em.
>>>> How does it break bdrv_read_em?  Are you seeing QEMU hung with 100% CPU
>>>> utilization or deadlocked?
>>> Sorry, I should have been more detailed here.
>>>
>>> No, it's nothing obvious, it must be some subtle side effect. The result
>>> of bdrv_read_em itself seems to be correct (return value and checksum of
>>> the read buffer).
>>>
>>> However instead of booting into the DOS setup I only get an error
>>> message "Kein System oder Laufwerksfehler" (don't know how it reads in
>>> English DOS versions), which seems to be produced by the boot sector.
>>>
>>> I excluded all of the minor changes, so I'm sure that it's caused by the
>>> switch from kill() to a direct call of the function that writes into the
>>> pipe.
>>>
>>>> One interesting thing is that qemu_aio_wait() does not release the QEMU
>>>> mutex, so we cannot write to a pipe with the mutex held and then spin
>>>> waiting for the iothread to do work for us.
>>>>
>>>> Exactly how kill and qemu_notify_event() were different I'm not sure
>>>> right now but it could be a factor.
>>> This would cause a hang, right? Then it isn't what I'm seeing.
>> While trying out some more things, I added some fprintfs to
>> posix_aio_process_queue() and suddenly it also fails with the kill()
>> version. So what has changed might really just be the timing, and it
>> could be a race somewhere that has always (?) existed.
> Replying to myself again... It looks like there is a problem with
> reentrancy in fdctrl_transfer_handler. I think this would have been
> guarded by the AsyncContexts before, but we don't have them any more.
>
> qemu-system-x86_64: /root/upstream/qemu/hw/fdc.c:1253:
> fdctrl_transfer_handler: Assertion `reentrancy == 0' failed.
>
> Program received signal SIGABRT, Aborted.
>
> (gdb) bt
> #0  0x0000003ccd2329a5 in raise () from /lib64/libc.so.6
> #1  0x0000003ccd234185 in abort () from /lib64/libc.so.6
> #2  0x0000003ccd22b935 in __assert_fail () from /lib64/libc.so.6
> #3  0x000000000046ff09 in fdctrl_transfer_handler (opaque=<value
> optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
>      dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1253
> #4  0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
> #5  DMA_run () at /root/upstream/qemu/hw/dma.c:378
> #6  0x000000000040b0e1 in qemu_bh_poll () at async.c:70
> #7  0x000000000040aa19 in qemu_aio_wait () at aio.c:147
> #8  0x000000000041c355 in bdrv_read_em (bs=0x131fd80, sector_num=19,
> buf=<value optimized out>, nb_sectors=1) at block.c:2896
> #9  0x000000000041b3d2 in bdrv_read (bs=0x131fd80, sector_num=19,
> buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
> #10 0x000000000041b3d2 in bdrv_read (bs=0x131f430, sector_num=19,
> buf=0x1785a00 "IO      SYS!", nb_sectors=1) at block.c:1062
> #11 0x000000000046fbb8 in do_fdctrl_transfer_handler (opaque=0x1785788,
> nchan=2, dma_pos=<value optimized out>, dma_len=512)
>      at /root/upstream/qemu/hw/fdc.c:1178
> #12 0x000000000046fecf in fdctrl_transfer_handler (opaque=<value
> optimized out>, nchan=<value optimized out>, dma_pos=<value optimized out>,
>      dma_len=<value optimized out>) at /root/upstream/qemu/hw/fdc.c:1255
> #13 0x000000000046702c in channel_run () at /root/upstream/qemu/hw/dma.c:348
> #14 DMA_run () at /root/upstream/qemu/hw/dma.c:378
> #15 0x000000000046e456 in fdctrl_start_transfer (fdctrl=0x1785788,
> direction=1) at /root/upstream/qemu/hw/fdc.c:1107
> #16 0x0000000000558a41 in kvm_handle_io (env=0x1323ff0) at
> /root/upstream/qemu/kvm-all.c:834
> #17 kvm_cpu_exec (env=0x1323ff0) at /root/upstream/qemu/kvm-all.c:976
> #18 0x000000000053686a in qemu_kvm_cpu_thread_fn (arg=0x1323ff0) at
> /root/upstream/qemu/cpus.c:661
> #19 0x0000003ccda077e1 in start_thread () from /lib64/libpthread.so.0
> #20 0x0000003ccd2e151d in clone () from /lib64/libc.so.6
>
> I'm afraid that we can only avoid things like this reliably if we
> convert all devices to be direct users of AIO/coroutines. The current
> block layer infrastructure doesn't emulate the behaviour of bdrv_read
> accurately as bottom halves can be run in the nested main loop.
>
> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
> this solve your problems?), though it's not very satisfying. And I'm not
> quite sure yet why it doesn't always happen with kill() in
> posix-aio-compat.c.
>
> diff --git a/hw/dma.c b/hw/dma.c
> index 8a7302a..1d3b6f1 100644
> --- a/hw/dma.c
> +++ b/hw/dma.c
> @@ -358,6 +358,13 @@ static void DMA_run (void)
>       struct dma_cont *d;
>       int icont, ichan;
>       int rearm = 0;
> +    static int running = 0;
> +
> +    if (running) {
> +        goto out;
> +    } else {
> +        running = 0;
> +    }
>
>       d = dma_controllers;
>
> @@ -374,6 +381,8 @@ static void DMA_run (void)
>           }
>       }
>
> +out:
> +    running = 0;
>       if (rearm)
>           qemu_bh_schedule_idle(dma_bh);
>   }
>
> Kevin

Kevin,

In my quick test (compiling qemu.git master + your dma patch, and 
running a FreeDOS floppy image) it does not have any visible difference.

The boot is still stuck after printing "FreeDOS" at the console.

PS: We will trigger a full blown test, with a Windows installation using 
a floppy, but the results with the FreeDOS floppy have been very 
consistent with the full blown test.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 11:50           ` Paolo Bonzini
@ 2011-10-28 12:29             ` Kevin Wolf
  2011-10-28 12:31               ` Stefan Hajnoczi
  0 siblings, 1 reply; 17+ messages in thread
From: Kevin Wolf @ 2011-10-28 12:29 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Lucas Meneghel Rodrigues, aliguori, Stefan Hajnoczi, qemu-devel,
	Frediano Ziglio, Cleber Rosa

Am 28.10.2011 13:50, schrieb Paolo Bonzini:
> On 10/28/2011 01:33 PM, Kevin Wolf wrote:
>> I'm afraid that we can only avoid things like this reliably if we
>> convert all devices to be direct users of AIO/coroutines. The current
>> block layer infrastructure doesn't emulate the behaviour of bdrv_read
>> accurately as bottom halves can be run in the nested main loop.
>>
>> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
>> this solve your problems?), though it's not very satisfying. And I'm not
>> quite sure yet why it doesn't always happen with kill() in
>> posix-aio-compat.c.
> 
> Another "fix" is to change idle bottom halves (at least the one in 
> hw/dma.c) to 10ms timers.

Which would be using the fact that timers are only executed in the real
main loop. Which makes me wonder if it would be enough for floppy if we
changed qemu_bh_poll() to take a bool run_idle_bhs that would be true in
the main loop and false an qemu_aio_wait().

Still this wouldn't be a general solution as normal BHs have the very
same problem if they are scheduled before a bdrv_read/write call. To
solve that I guess we'd have to reintroduce AsyncContext, but it has its
own problems and was removed for a reason.

Or we make some serious effort now to convert devices to AIO.

Kevin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 12:29             ` Kevin Wolf
@ 2011-10-28 12:31               ` Stefan Hajnoczi
  2011-10-28 15:58                 ` Paolo Bonzini
  2011-10-31  2:10                 ` Zhi Yong Wu
  0 siblings, 2 replies; 17+ messages in thread
From: Stefan Hajnoczi @ 2011-10-28 12:31 UTC (permalink / raw)
  To: Zhi Yong Wu
  Cc: Lucas Meneghel Rodrigues, Kevin Wolf, aliguori, Stefan Hajnoczi,
	qemu-devel, Frediano Ziglio, Cleber Rosa, Paolo Bonzini

On Fri, Oct 28, 2011 at 1:29 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Am 28.10.2011 13:50, schrieb Paolo Bonzini:
>> On 10/28/2011 01:33 PM, Kevin Wolf wrote:
>>> I'm afraid that we can only avoid things like this reliably if we
>>> convert all devices to be direct users of AIO/coroutines. The current
>>> block layer infrastructure doesn't emulate the behaviour of bdrv_read
>>> accurately as bottom halves can be run in the nested main loop.
>>>
>>> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
>>> this solve your problems?), though it's not very satisfying. And I'm not
>>> quite sure yet why it doesn't always happen with kill() in
>>> posix-aio-compat.c.
>>
>> Another "fix" is to change idle bottom halves (at least the one in
>> hw/dma.c) to 10ms timers.
>
> Which would be using the fact that timers are only executed in the real
> main loop. Which makes me wonder if it would be enough for floppy if we
> changed qemu_bh_poll() to take a bool run_idle_bhs that would be true in
> the main loop and false an qemu_aio_wait().
>
> Still this wouldn't be a general solution as normal BHs have the very
> same problem if they are scheduled before a bdrv_read/write call. To
> solve that I guess we'd have to reintroduce AsyncContext, but it has its
> own problems and was removed for a reason.
>
> Or we make some serious effort now to convert devices to AIO.

Zhi Yong: We were just talking about converting devices to aio.  If
you have time to do that for fdc, sd, or any other synchronous API
users in hw/ that would be helpful.  Please let us know which device
you are refactoring so we don't duplicate work.

Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 12:31               ` Stefan Hajnoczi
@ 2011-10-28 15:58                 ` Paolo Bonzini
  2011-10-31  2:10                 ` Zhi Yong Wu
  1 sibling, 0 replies; 17+ messages in thread
From: Paolo Bonzini @ 2011-10-28 15:58 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Lucas Meneghel Rodrigues, Kevin Wolf, aliguori, Stefan Hajnoczi,
	Peter Maydell, qemu-devel, Michael Walle, Zhi Yong Wu,
	Frediano Ziglio, Cleber Rosa

On 10/28/2011 02:31 PM, Stefan Hajnoczi wrote:
> Zhi Yong: We were just talking about converting devices to aio.  If
> you have time to do that for fdc, sd, or any other synchronous API
> users in hw/ that would be helpful.  Please let us know which device
> you are refactoring so we don't duplicate work.

The problem is not really fdc or sd themselves, but whoever uses the 
result of the synchronous reads---respectively DMA and the SD clients.

Some SD clients talk to the SD card in a relatively confined way and 
have interrupts that they set when the operation is done, so these 
confined parts that talk to the card could be changed to a coroutine and 
locked with a CoMutex.  However, not even all of these can do it (in 
particular I'm not sure about ssi-sd.c cannot).

I'm thinking that the problem with the floppy is really that it mixes 
synchronous and asynchronous parts.  As long as you're entirely 
synchronous you should not have any problem, but as soon as you add 
asynchronicity (via bottom halves) you now have to deal with reentrancy.

"git grep _bh hw/" suggests that this should not be a huge problem; most 
if not all occurrences are related to ptimers, or are in entirely 
asynchronous code (IDE, SCSI, virtio).  Floppy+DMA seems to be the only 
problematic occurrence, and any fix (switch to timers, drop idle BH in 
qemu_aio_wait, reschedule if DMA reenters during I/O, drop BH completely 
and just loop) is as good as the others.

(Actually, another one worth checking is ATAPI, but I don't know the 
code and the standards well enough).

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
  2011-10-28 12:31               ` Stefan Hajnoczi
  2011-10-28 15:58                 ` Paolo Bonzini
@ 2011-10-31  2:10                 ` Zhi Yong Wu
  1 sibling, 0 replies; 17+ messages in thread
From: Zhi Yong Wu @ 2011-10-31  2:10 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Lucas Meneghel Rodrigues, Kevin Wolf, aliguori, Stefan Hajnoczi,
	qemu-devel, Frediano Ziglio, Cleber Rosa, Paolo Bonzini

On Fri, Oct 28, 2011 at 01:31:20PM +0100, Stefan Hajnoczi wrote:
>Subject: Re: [Qemu-devel] [PATCH v2] block: avoid SIGUSR2
>From: Stefan Hajnoczi <stefanha@gmail.com>
>To: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
>Cc: Paolo Bonzini <pbonzini@redhat.com>, Lucas Meneghel Rodrigues
> <lmr@redhat.com>, aliguori@us.ibm.com, Stefan Hajnoczi
> <stefanha@linux.vnet.ibm.com>, qemu-devel@nongnu.org, Frediano Ziglio
> <freddy77@gmail.com>, Cleber Rosa <crosa@redhat.com>, Kevin Wolf
> <kwolf@redhat.com>
>Content-Type: text/plain; charset=ISO-8859-1
>x-cbid: 11102812-3534-0000-0000-000000FD91EE
>X-IBM-ISS-SpamDetectors: Score=0; BY=0; FL=0; FP=0; FZ=0; HX=0; KW=0; PH=0;
> SC=0; ST=0; TS=0; UL=0; ISC=
>X-IBM-ISS-DetailInfo: BY=3.00000227; HX=3.00000175; KW=3.00000007;
> PH=3.00000001; SC=3.00000001; SDB=6.00082671; UDB=6.00022873;
> UTC=2011-10-28 12:31:35
>X-Xagent-From: stefanha@gmail.com
>X-Xagent-To: wuzhy@linux.vnet.ibm.com
>X-Xagent-Gateway: vmsdvm6.vnet.ibm.com (XAGENTU8 at VMSDVM6)
>
>On Fri, Oct 28, 2011 at 1:29 PM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Am 28.10.2011 13:50, schrieb Paolo Bonzini:
>>> On 10/28/2011 01:33 PM, Kevin Wolf wrote:
>>>> I'm afraid that we can only avoid things like this reliably if we
>>>> convert all devices to be direct users of AIO/coroutines. The current
>>>> block layer infrastructure doesn't emulate the behaviour of bdrv_read
>>>> accurately as bottom halves can be run in the nested main loop.
>>>>
>>>> For floppy, the following seems to be a quick fix (Lucas, Cleber, does
>>>> this solve your problems?), though it's not very satisfying. And I'm not
>>>> quite sure yet why it doesn't always happen with kill() in
>>>> posix-aio-compat.c.
>>>
>>> Another "fix" is to change idle bottom halves (at least the one in
>>> hw/dma.c) to 10ms timers.
>>
>> Which would be using the fact that timers are only executed in the real
>> main loop. Which makes me wonder if it would be enough for floppy if we
>> changed qemu_bh_poll() to take a bool run_idle_bhs that would be true in
>> the main loop and false an qemu_aio_wait().
>>
>> Still this wouldn't be a general solution as normal BHs have the very
>> same problem if they are scheduled before a bdrv_read/write call. To
>> solve that I guess we'd have to reintroduce AsyncContext, but it has its
>> own problems and was removed for a reason.
>>
>> Or we make some serious effort now to convert devices to AIO.
>
>Zhi Yong: We were just talking about converting devices to aio.  If
>you have time to do that for fdc, sd, or any other synchronous API
>users in hw/ that would be helpful.  Please let us know which device
>you are refactoring so we don't duplicate work.
Stefan,
I am working on flash(onenand, CFI), cdrom, sd, fdc, etc. If anyone has good thought, pls let me know.:)


Regards,

Zhi Yong Wu
>
>Stefan
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2011-10-31  2:11 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-19 14:37 [Qemu-devel] [PATCH v2] block: avoid SIGUSR2 Frediano Ziglio
2011-09-19 15:02 ` Paolo Bonzini
2011-09-19 15:11   ` Kevin Wolf
2011-09-19 15:25     ` Paolo Bonzini
2011-09-19 15:15 ` Kevin Wolf
2011-10-27 13:26 ` Kevin Wolf
2011-10-27 13:57   ` Stefan Hajnoczi
2011-10-27 14:15     ` Kevin Wolf
2011-10-27 14:32       ` Kevin Wolf
2011-10-28 11:33         ` Kevin Wolf
2011-10-28 11:35           ` Kevin Wolf
2011-10-28 11:50           ` Paolo Bonzini
2011-10-28 12:29             ` Kevin Wolf
2011-10-28 12:31               ` Stefan Hajnoczi
2011-10-28 15:58                 ` Paolo Bonzini
2011-10-31  2:10                 ` Zhi Yong Wu
2011-10-28 12:20           ` Cleber Rosa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).