All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: bharata@linux.vnet.ibm.com
Cc: Kevin Wolf <kwolf@redhat.com>, Vijay Bellur <vbellur@redhat.com>,
	Stefan Hajnoczi <stefanha@gmail.com>,
	qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>,
	Asias He <asias@redhat.com>,
	MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Subject: Re: [Qemu-devel] [PATCH] block: Fix race in gluster_finish_aiocb
Date: Fri, 23 Aug 2013 09:33:21 +0200	[thread overview]
Message-ID: <52171041.9030805@redhat.com> (raw)
In-Reply-To: <20130823064804.GG2755@in.ibm.com>

Il 23/08/2013 08:48, Bharata B Rao ha scritto:
> On Wed, Aug 21, 2013 at 05:40:11PM +0200, Paolo Bonzini wrote:
>>
>> We could just use a bottom half, too.  Add a bottom half to acb,
>> schedule it in gluster_finish_aiocb, delete it in the bottom half's own
>> callback.
> 
> I tried this approach (the patch at the end of this mail), but see this
> occasional errors, doesn't happen always. Any clues on what am I missing here ?
> 
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fffcbfff700 (LWP 11301)]
> 0x000055555597156b in event_notifier_set (e=0xa4)
>     at util/event_notifier-posix.c:97
> 97	        ret = write(e->wfd, &value, sizeof(value));
> (gdb) bt
> #0  0x000055555597156b in event_notifier_set (e=0xa4)
>     at util/event_notifier-posix.c:97
> #1  0x00005555555dd3cd in aio_notify (ctx=0x0) at async.c:233
> #2  0x00005555555dd00f in qemu_bh_schedule (bh=0x7fff300008e0) at async.c:128
> #3  0x00005555556002da in gluster_finish_aiocb (fd=0x5555562ae5d0, ret=4096, 
>     arg=0x7fffd00419c0) at block/gluster.c:409
> #4  0x00007ffff5e70cdc in glfs_io_async_cbk (ret=4096, frame=0x0, data=
>     0x555556443ee0) at glfs-fops.c:567
> #5  0x00007ffff5c2843e in synctask_wrap (old_task=0x555556365940)
>     at syncop.c:133
> #6  0x00007ffff3b03370 in ?? () from /lib64/libc.so.6
> #7  0x0000000000000000 in ?? ()
> (gdb) up
> #1  0x00005555555dd3cd in aio_notify (ctx=0x0) at async.c:233
> 233	    event_notifier_set(&ctx->notifier);
> (gdb) up
> #2  0x00005555555dd00f in qemu_bh_schedule (bh=0x7fff300008e0) at async.c:128
> 128	    aio_notify(bh->ctx);
> (gdb) p *bh
> $1 = {ctx = 0x0, cb = 0x5555555ffdcd <qemu_gluster_aio_bh>, opaque = 
>     0x7fffd00419c0, next = 0x555556345e70, scheduled = false, idle = false, 
>   deleted = true}

This looks like a use-after-free, with bh->ctx corrupted when freeing
the bottom half.  But it's not at all obvious how it can happen.

I suggest using MALLOC_PERTURB_=42 to check this theory (if it is
correct, most fields will be something like 0x2a2a2a2a2a2a2a2a).  But I
don't see anything clearly wrong in the patch... Thus perhaps it is
simpler to just remove the unreachable error handling code.

Paolo

> 
> 
> gluster: Use BH mechanism for AIO completion
> 
> Replace the existing pipe based AIO completion handing by BH based method.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  block/gluster.c |   69 ++++++-------------------------------------------------
>  1 file changed, 8 insertions(+), 61 deletions(-)
> 
> diff --git a/block/gluster.c b/block/gluster.c
> index 46f36f8..598b335 100644
> --- a/block/gluster.c
> +++ b/block/gluster.c
> @@ -30,7 +30,6 @@ typedef struct GlusterAIOCB {
>  
>  typedef struct BDRVGlusterState {
>      struct glfs *glfs;
> -    int fds[2];
>      struct glfs_fd *fd;
>      int event_reader_pos;
>      GlusterAIOCB *event_acb;
> @@ -231,12 +230,13 @@ out:
>      return NULL;
>  }
>  
> -static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
> +static void qemu_gluster_aio_bh(void *opaque)
>  {
>      int ret;
> +    GlusterAIOCB *acb = opaque;
>      bool *finished = acb->finished;
>      BlockDriverCompletionFunc *cb = acb->common.cb;
> -    void *opaque = acb->common.opaque;
> +    void *cb_opaque = acb->common.opaque;
>  
>      if (!acb->ret || acb->ret == acb->size) {
>          ret = 0; /* Success */
> @@ -246,33 +246,15 @@ static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
>          ret = -EIO; /* Partial read/write - fail it */
>      }
>  
> +    qemu_bh_delete(acb->bh);
>      qemu_aio_release(acb);
> -    cb(opaque, ret);
> +
> +    cb(cb_opaque, ret);
>      if (finished) {
>          *finished = true;
>      }
>  }
>  
> -static void qemu_gluster_aio_event_reader(void *opaque)
> -{
> -    BDRVGlusterState *s = opaque;
> -    ssize_t ret;
> -
> -    do {
> -        char *p = (char *)&s->event_acb;
> -
> -        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
> -                   sizeof(s->event_acb) - s->event_reader_pos);
> -        if (ret > 0) {
> -            s->event_reader_pos += ret;
> -            if (s->event_reader_pos == sizeof(s->event_acb)) {
> -                s->event_reader_pos = 0;
> -                qemu_gluster_complete_aio(s->event_acb, s);
> -            }
> -        }
> -    } while (ret < 0 && errno == EINTR);
> -}
> -
>  /* TODO Convert to fine grained options */
>  static QemuOptsList runtime_opts = {
>      .name = "gluster",
> @@ -329,17 +311,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
>      s->fd = glfs_open(s->glfs, gconf->image, open_flags);
>      if (!s->fd) {
>          ret = -errno;
> -        goto out;
> -    }
> -
> -    ret = qemu_pipe(s->fds);
> -    if (ret < 0) {
> -        ret = -errno;
> -        goto out;
>      }
> -    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
> -    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
> -        qemu_gluster_aio_event_reader, NULL, s);
>  
>  out:
>      qemu_opts_del(opts);
> @@ -417,31 +389,10 @@ static const AIOCBInfo gluster_aiocb_info = {
>  static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
>  {
>      GlusterAIOCB *acb = (GlusterAIOCB *)arg;
> -    BlockDriverState *bs = acb->common.bs;
> -    BDRVGlusterState *s = bs->opaque;
> -    int retval;
>  
>      acb->ret = ret;
> -    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
> -    if (retval != sizeof(acb)) {
> -        /*
> -         * Gluster AIO callback thread failed to notify the waiting
> -         * QEMU thread about IO completion.
> -         *
> -         * Complete this IO request and make the disk inaccessible for
> -         * subsequent reads and writes.
> -         */
> -        error_report("Gluster failed to notify QEMU about IO completion");
> -
> -        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
> -        acb->common.cb(acb->common.opaque, -EIO);
> -        qemu_aio_release(acb);
> -        close(s->fds[GLUSTER_FD_READ]);
> -        close(s->fds[GLUSTER_FD_WRITE]);
> -        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
> -        bs->drv = NULL; /* Make the disk inaccessible */
> -        qemu_mutex_unlock_iothread();
> -    }
> +    acb->bh = qemu_bh_new(qemu_gluster_aio_bh, acb);
> +    qemu_bh_schedule(acb->bh);
>  }
>  
>  static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
> @@ -592,10 +543,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
>  {
>      BDRVGlusterState *s = bs->opaque;
>  
> -    close(s->fds[GLUSTER_FD_READ]);
> -    close(s->fds[GLUSTER_FD_WRITE]);
> -    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
> -
>      if (s->fd) {
>          glfs_close(s->fd);
>          s->fd = NULL;
> 

  reply	other threads:[~2013-08-23  7:34 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-21  2:02 [Qemu-devel] [PATCH] block: Fix race in gluster_finish_aiocb Asias He
2013-08-21  8:16 ` Paolo Bonzini
2013-08-22  9:50   ` Asias He
2013-08-22  9:51     ` Paolo Bonzini
2013-08-23  8:32       ` Asias He
2013-08-23  9:05         ` Paolo Bonzini
2013-08-21 15:24 ` Stefan Hajnoczi
2013-08-21 15:40   ` Paolo Bonzini
2013-08-22  5:59     ` Bharata B Rao
2013-08-22  7:48       ` Stefan Hajnoczi
2013-08-22  9:06         ` Paolo Bonzini
2013-08-22  9:55           ` Bharata B Rao
2013-08-22 10:00             ` Paolo Bonzini
2013-08-22 10:28               ` Bharata B Rao
2013-08-22 11:15                 ` Paolo Bonzini
2013-08-22 13:25                   ` Bharata B Rao
2013-08-22 13:27                     ` Paolo Bonzini
2013-08-22 14:01                       ` Bharata B Rao
2013-08-22 14:52                         ` Paolo Bonzini
2013-08-23  6:48     ` Bharata B Rao
2013-08-23  7:33       ` Paolo Bonzini [this message]
2013-08-23  8:11         ` Bharata B Rao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52171041.9030805@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=asias@redhat.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=kwolf@redhat.com \
    --cc=morita.kazutaka@lab.ntt.co.jp \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@gmail.com \
    --cc=stefanha@redhat.com \
    --cc=vbellur@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.