From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35318) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Xtvj4-0000ub-Pz for qemu-devel@nongnu.org; Thu, 27 Nov 2014 04:51:08 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Xtviz-00037h-2V for qemu-devel@nongnu.org; Thu, 27 Nov 2014 04:51:02 -0500 Received: from mx-v6.kamp.de ([2a02:248:0:51::16]:60424 helo=mx01.kamp.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Xtviy-00037T-LL for qemu-devel@nongnu.org; Thu, 27 Nov 2014 04:50:57 -0500 Message-ID: <5476F3F1.6000105@kamp.de> Date: Thu, 27 Nov 2014 10:50:41 +0100 From: Peter Lieven MIME-Version: 1.0 References: <1417013204-30676-1-git-send-email-kwolf@redhat.com> <1417013204-30676-3-git-send-email-kwolf@redhat.com> In-Reply-To: <1417013204-30676-3-git-send-email-kwolf@redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [RFC PATCH 2/3] raw-posix: Convert Linux AIO submission to coroutines List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Kevin Wolf , qemu-devel@nongnu.org Cc: pbonzini@redhat.com, ming.lei@canonical.com, stefanha@redhat.com On 26.11.2014 15:46, Kevin Wolf wrote: > This improves the performance of requests because an ACB doesn't need to > be allocated on the heap any more. It also makes the code nicer and > smaller. > > As a side effect, the codepath taken by aio=threads is changed to use > paio_submit_co(). This doesn't change the performance at this point. > > Results of qemu-img bench -t none -c 10000000 [-n] /dev/loop0: > > | aio=native | aio=threads > | before | with patch | before | with patch > ------+----------+------------+----------+------------ > run 1 | 29.921s | 26.932s | 35.286s | 35.447s > run 2 | 29.793s | 26.252s | 35.276s | 35.111s > run 3 | 30.186s | 27.114s | 35.042s | 34.921s > run 4 | 30.425s | 26.600s | 35.169s | 34.968s > run 5 | 30.041s | 26.263s | 35.224s | 35.000s > > TODO: Do some more serious benchmarking in VMs with less variance. > Results of a quick fio run are vaguely positive. I still see the main-loop spun warnings with this patches applied to master. It wasn't there with the original patch from August. ~/git/qemu$ ./qemu-img bench -t none -c 10000000 -n /dev/ram1 Sending 10000000 requests, 4096 bytes each, 64 in parallel main-loop: WARNING: I/O thread spun for 1000 iterations Run completed in 31.947 seconds. Peter > > Signed-off-by: Kevin Wolf > --- > block/linux-aio.c | 70 +++++++++++++++++-------------------------------------- > block/raw-aio.h | 5 ++-- > block/raw-posix.c | 62 ++++++++++++++++++++++-------------------------- > 3 files changed, 52 insertions(+), 85 deletions(-) > > diff --git a/block/linux-aio.c b/block/linux-aio.c > index d92513b..99b259d 100644 > --- a/block/linux-aio.c > +++ b/block/linux-aio.c > @@ -12,6 +12,7 @@ > #include "qemu/queue.h" > #include "block/raw-aio.h" > #include "qemu/event_notifier.h" > +#include "block/coroutine.h" > > #include > > @@ -28,7 +29,7 @@ > #define MAX_QUEUED_IO 128 > > struct qemu_laiocb { > - BlockAIOCB common; > + Coroutine *co; > struct qemu_laio_state *ctx; > struct iocb iocb; > ssize_t ret; > @@ -86,9 +87,9 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, > } > } > } > - laiocb->common.cb(laiocb->common.opaque, ret); > > - qemu_aio_unref(laiocb); > + laiocb->ret = ret; > + qemu_coroutine_enter(laiocb->co, NULL); > } > > /* The completion BH fetches completed I/O requests and invokes their > @@ -146,30 +147,6 @@ static void qemu_laio_completion_cb(EventNotifier *e) > } > } > > -static void laio_cancel(BlockAIOCB *blockacb) > -{ > - struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; > - struct io_event event; > - int ret; > - > - if (laiocb->ret != -EINPROGRESS) { > - return; > - } > - ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); > - laiocb->ret = -ECANCELED; > - if (ret != 0) { > - /* iocb is not cancelled, cb will be called by the event loop later */ > - return; > - } > - > - laiocb->common.cb(laiocb->common.opaque, laiocb->ret); > -} > - > -static const AIOCBInfo laio_aiocb_info = { > - .aiocb_size = sizeof(struct qemu_laiocb), > - .cancel_async = laio_cancel, > -}; > - > static void ioq_init(LaioQueue *io_q) > { > io_q->size = MAX_QUEUED_IO; > @@ -243,23 +220,21 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug) > return ret; > } > > -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockCompletionFunc *cb, void *opaque, int type) > +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd, > + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type) > { > struct qemu_laio_state *s = aio_ctx; > - struct qemu_laiocb *laiocb; > - struct iocb *iocbs; > off_t offset = sector_num * 512; > > - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); > - laiocb->nbytes = nb_sectors * 512; > - laiocb->ctx = s; > - laiocb->ret = -EINPROGRESS; > - laiocb->is_read = (type == QEMU_AIO_READ); > - laiocb->qiov = qiov; > - > - iocbs = &laiocb->iocb; > + struct qemu_laiocb laiocb = { > + .co = qemu_coroutine_self(), > + .nbytes = nb_sectors * 512, > + .ctx = s, > + .is_read = (type == QEMU_AIO_READ), > + .qiov = qiov, > + }; > + struct iocb *iocbs = &laiocb.iocb; > + int ret; > > switch (type) { > case QEMU_AIO_WRITE: > @@ -272,22 +247,21 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, > default: > fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", > __func__, type); > - goto out_free_aiocb; > + return -EIO; > } > - io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); > + io_set_eventfd(&laiocb.iocb, event_notifier_get_fd(&s->e)); > > if (!s->io_q.plugged) { > - if (io_submit(s->ctx, 1, &iocbs) < 0) { > - goto out_free_aiocb; > + ret = io_submit(s->ctx, 1, &iocbs); > + if (ret < 0) { > + return ret; > } > } else { > ioq_enqueue(s, iocbs); > } > - return &laiocb->common; > > -out_free_aiocb: > - qemu_aio_unref(laiocb); > - return NULL; > + qemu_coroutine_yield(); > + return laiocb.ret; > } > > void laio_detach_aio_context(void *s_, AioContext *old_context) > diff --git a/block/raw-aio.h b/block/raw-aio.h > index 80681ce..b83c8af 100644 > --- a/block/raw-aio.h > +++ b/block/raw-aio.h > @@ -35,9 +35,8 @@ > #ifdef CONFIG_LINUX_AIO > void *laio_init(void); > void laio_cleanup(void *s); > -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockCompletionFunc *cb, void *opaque, int type); > +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd, > + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type); > void laio_detach_aio_context(void *s, AioContext *old_context); > void laio_attach_aio_context(void *s, AioContext *new_context); > void laio_io_plug(BlockDriverState *bs, void *aio_ctx); > diff --git a/block/raw-posix.c b/block/raw-posix.c > index b1af77e..aa16b53 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -1075,14 +1075,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, > return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); > } > > -static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockCompletionFunc *cb, void *opaque, int type) > +static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num, > + int nb_sectors, QEMUIOVector *qiov, int type) > { > BDRVRawState *s = bs->opaque; > > if (fd_open(bs) < 0) > - return NULL; > + return -EIO; > > /* > * Check if the underlying device requires requests to be aligned, > @@ -1095,14 +1094,25 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, > type |= QEMU_AIO_MISALIGNED; > #ifdef CONFIG_LINUX_AIO > } else if (s->use_aio) { > - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, > - nb_sectors, cb, opaque, type); > + return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov, > + nb_sectors, type); > #endif > } > } > > - return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, > - cb, opaque, type); > + return paio_submit_co(bs, s->fd, sector_num, qiov, nb_sectors, type); > +} > + > +static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ); > +} > + > +static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, > + int nb_sectors, QEMUIOVector *qiov) > +{ > + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE); > } > > static void raw_aio_plug(BlockDriverState *bs) > @@ -1135,22 +1145,6 @@ static void raw_aio_flush_io_queue(BlockDriverState *bs) > #endif > } > > -static BlockAIOCB *raw_aio_readv(BlockDriverState *bs, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockCompletionFunc *cb, void *opaque) > -{ > - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, > - cb, opaque, QEMU_AIO_READ); > -} > - > -static BlockAIOCB *raw_aio_writev(BlockDriverState *bs, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockCompletionFunc *cb, void *opaque) > -{ > - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, > - cb, opaque, QEMU_AIO_WRITE); > -} > - > static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, > BlockCompletionFunc *cb, void *opaque) > { > @@ -1701,8 +1695,8 @@ static BlockDriver bdrv_file = { > .bdrv_co_get_block_status = raw_co_get_block_status, > .bdrv_co_write_zeroes = raw_co_write_zeroes, > > - .bdrv_aio_readv = raw_aio_readv, > - .bdrv_aio_writev = raw_aio_writev, > + .bdrv_co_readv = raw_co_readv, > + .bdrv_co_writev = raw_co_writev, > .bdrv_aio_flush = raw_aio_flush, > .bdrv_aio_discard = raw_aio_discard, > .bdrv_refresh_limits = raw_refresh_limits, > @@ -2103,8 +2097,8 @@ static BlockDriver bdrv_host_device = { > .create_opts = &raw_create_opts, > .bdrv_co_write_zeroes = hdev_co_write_zeroes, > > - .bdrv_aio_readv = raw_aio_readv, > - .bdrv_aio_writev = raw_aio_writev, > + .bdrv_co_readv = raw_co_readv, > + .bdrv_co_writev = raw_co_writev, > .bdrv_aio_flush = raw_aio_flush, > .bdrv_aio_discard = hdev_aio_discard, > .bdrv_refresh_limits = raw_refresh_limits, > @@ -2252,8 +2246,8 @@ static BlockDriver bdrv_host_floppy = { > .bdrv_create = hdev_create, > .create_opts = &raw_create_opts, > > - .bdrv_aio_readv = raw_aio_readv, > - .bdrv_aio_writev = raw_aio_writev, > + .bdrv_co_readv = raw_co_readv, > + .bdrv_co_writev = raw_co_writev, > .bdrv_aio_flush = raw_aio_flush, > .bdrv_refresh_limits = raw_refresh_limits, > .bdrv_io_plug = raw_aio_plug, > @@ -2383,8 +2377,8 @@ static BlockDriver bdrv_host_cdrom = { > .bdrv_create = hdev_create, > .create_opts = &raw_create_opts, > > - .bdrv_aio_readv = raw_aio_readv, > - .bdrv_aio_writev = raw_aio_writev, > + .bdrv_co_readv = raw_co_readv, > + .bdrv_co_writev = raw_co_writev, > .bdrv_aio_flush = raw_aio_flush, > .bdrv_refresh_limits = raw_refresh_limits, > .bdrv_io_plug = raw_aio_plug, > @@ -2520,8 +2514,8 @@ static BlockDriver bdrv_host_cdrom = { > .bdrv_create = hdev_create, > .create_opts = &raw_create_opts, > > - .bdrv_aio_readv = raw_aio_readv, > - .bdrv_aio_writev = raw_aio_writev, > + .bdrv_co_readv = raw_co_readv, > + .bdrv_co_writev = raw_co_writev, > .bdrv_aio_flush = raw_aio_flush, > .bdrv_refresh_limits = raw_refresh_limits, > .bdrv_io_plug = raw_aio_plug, -- Mit freundlichen Grüßen Peter Lieven ........................................................... KAMP Netzwerkdienste GmbH Vestische Str. 89-91 | 46117 Oberhausen Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40 pl@kamp.de | http://www.kamp.de Geschäftsführer: Heiner Lante | Michael Lante Amtsgericht Duisburg | HRB Nr. 12154 USt-Id-Nr.: DE 120607556 ...........................................................