From: Peter Lieven <pl@kamp.de>
To: Kevin Wolf <kwolf@redhat.com>, qemu-devel@nongnu.org
Cc: pbonzini@redhat.com, ming.lei@canonical.com, stefanha@redhat.com
Subject: Re: [Qemu-devel] [RFC PATCH 2/3] raw-posix: Convert Linux AIO submission to coroutines
Date: Thu, 27 Nov 2014 10:50:41 +0100 [thread overview]
Message-ID: <5476F3F1.6000105@kamp.de> (raw)
In-Reply-To: <1417013204-30676-3-git-send-email-kwolf@redhat.com>
On 26.11.2014 15:46, Kevin Wolf wrote:
> This improves the performance of requests because an ACB doesn't need to
> be allocated on the heap any more. It also makes the code nicer and
> smaller.
>
> As a side effect, the codepath taken by aio=threads is changed to use
> paio_submit_co(). This doesn't change the performance at this point.
>
> Results of qemu-img bench -t none -c 10000000 [-n] /dev/loop0:
>
> | aio=native | aio=threads
> | before | with patch | before | with patch
> ------+----------+------------+----------+------------
> run 1 | 29.921s | 26.932s | 35.286s | 35.447s
> run 2 | 29.793s | 26.252s | 35.276s | 35.111s
> run 3 | 30.186s | 27.114s | 35.042s | 34.921s
> run 4 | 30.425s | 26.600s | 35.169s | 34.968s
> run 5 | 30.041s | 26.263s | 35.224s | 35.000s
>
> TODO: Do some more serious benchmarking in VMs with less variance.
> Results of a quick fio run are vaguely positive.
I still see the main-loop spun warnings with this patches applied to master.
It wasn't there with the original patch from August.
~/git/qemu$ ./qemu-img bench -t none -c 10000000 -n /dev/ram1
Sending 10000000 requests, 4096 bytes each, 64 in parallel
main-loop: WARNING: I/O thread spun for 1000 iterations
Run completed in 31.947 seconds.
Peter
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/linux-aio.c | 70 +++++++++++++++++--------------------------------------
> block/raw-aio.h | 5 ++--
> block/raw-posix.c | 62 ++++++++++++++++++++++--------------------------
> 3 files changed, 52 insertions(+), 85 deletions(-)
>
> diff --git a/block/linux-aio.c b/block/linux-aio.c
> index d92513b..99b259d 100644
> --- a/block/linux-aio.c
> +++ b/block/linux-aio.c
> @@ -12,6 +12,7 @@
> #include "qemu/queue.h"
> #include "block/raw-aio.h"
> #include "qemu/event_notifier.h"
> +#include "block/coroutine.h"
>
> #include <libaio.h>
>
> @@ -28,7 +29,7 @@
> #define MAX_QUEUED_IO 128
>
> struct qemu_laiocb {
> - BlockAIOCB common;
> + Coroutine *co;
> struct qemu_laio_state *ctx;
> struct iocb iocb;
> ssize_t ret;
> @@ -86,9 +87,9 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
> }
> }
> }
> - laiocb->common.cb(laiocb->common.opaque, ret);
>
> - qemu_aio_unref(laiocb);
> + laiocb->ret = ret;
> + qemu_coroutine_enter(laiocb->co, NULL);
> }
>
> /* The completion BH fetches completed I/O requests and invokes their
> @@ -146,30 +147,6 @@ static void qemu_laio_completion_cb(EventNotifier *e)
> }
> }
>
> -static void laio_cancel(BlockAIOCB *blockacb)
> -{
> - struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
> - struct io_event event;
> - int ret;
> -
> - if (laiocb->ret != -EINPROGRESS) {
> - return;
> - }
> - ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
> - laiocb->ret = -ECANCELED;
> - if (ret != 0) {
> - /* iocb is not cancelled, cb will be called by the event loop later */
> - return;
> - }
> -
> - laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
> -}
> -
> -static const AIOCBInfo laio_aiocb_info = {
> - .aiocb_size = sizeof(struct qemu_laiocb),
> - .cancel_async = laio_cancel,
> -};
> -
> static void ioq_init(LaioQueue *io_q)
> {
> io_q->size = MAX_QUEUED_IO;
> @@ -243,23 +220,21 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
> return ret;
> }
>
> -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> - BlockCompletionFunc *cb, void *opaque, int type)
> +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd,
> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
> {
> struct qemu_laio_state *s = aio_ctx;
> - struct qemu_laiocb *laiocb;
> - struct iocb *iocbs;
> off_t offset = sector_num * 512;
>
> - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
> - laiocb->nbytes = nb_sectors * 512;
> - laiocb->ctx = s;
> - laiocb->ret = -EINPROGRESS;
> - laiocb->is_read = (type == QEMU_AIO_READ);
> - laiocb->qiov = qiov;
> -
> - iocbs = &laiocb->iocb;
> + struct qemu_laiocb laiocb = {
> + .co = qemu_coroutine_self(),
> + .nbytes = nb_sectors * 512,
> + .ctx = s,
> + .is_read = (type == QEMU_AIO_READ),
> + .qiov = qiov,
> + };
> + struct iocb *iocbs = &laiocb.iocb;
> + int ret;
>
> switch (type) {
> case QEMU_AIO_WRITE:
> @@ -272,22 +247,21 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
> default:
> fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
> __func__, type);
> - goto out_free_aiocb;
> + return -EIO;
> }
> - io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
> + io_set_eventfd(&laiocb.iocb, event_notifier_get_fd(&s->e));
>
> if (!s->io_q.plugged) {
> - if (io_submit(s->ctx, 1, &iocbs) < 0) {
> - goto out_free_aiocb;
> + ret = io_submit(s->ctx, 1, &iocbs);
> + if (ret < 0) {
> + return ret;
> }
> } else {
> ioq_enqueue(s, iocbs);
> }
> - return &laiocb->common;
>
> -out_free_aiocb:
> - qemu_aio_unref(laiocb);
> - return NULL;
> + qemu_coroutine_yield();
> + return laiocb.ret;
> }
>
> void laio_detach_aio_context(void *s_, AioContext *old_context)
> diff --git a/block/raw-aio.h b/block/raw-aio.h
> index 80681ce..b83c8af 100644
> --- a/block/raw-aio.h
> +++ b/block/raw-aio.h
> @@ -35,9 +35,8 @@
> #ifdef CONFIG_LINUX_AIO
> void *laio_init(void);
> void laio_cleanup(void *s);
> -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> - BlockCompletionFunc *cb, void *opaque, int type);
> +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd,
> + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
> void laio_detach_aio_context(void *s, AioContext *old_context);
> void laio_attach_aio_context(void *s, AioContext *new_context);
> void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index b1af77e..aa16b53 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -1075,14 +1075,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
> return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
> }
>
> -static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> - BlockCompletionFunc *cb, void *opaque, int type)
> +static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
> + int nb_sectors, QEMUIOVector *qiov, int type)
> {
> BDRVRawState *s = bs->opaque;
>
> if (fd_open(bs) < 0)
> - return NULL;
> + return -EIO;
>
> /*
> * Check if the underlying device requires requests to be aligned,
> @@ -1095,14 +1094,25 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
> type |= QEMU_AIO_MISALIGNED;
> #ifdef CONFIG_LINUX_AIO
> } else if (s->use_aio) {
> - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
> - nb_sectors, cb, opaque, type);
> + return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
> + nb_sectors, type);
> #endif
> }
> }
>
> - return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
> - cb, opaque, type);
> + return paio_submit_co(bs, s->fd, sector_num, qiov, nb_sectors, type);
> +}
> +
> +static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
> + int nb_sectors, QEMUIOVector *qiov)
> +{
> + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
> +}
> +
> +static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
> + int nb_sectors, QEMUIOVector *qiov)
> +{
> + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
> }
>
> static void raw_aio_plug(BlockDriverState *bs)
> @@ -1135,22 +1145,6 @@ static void raw_aio_flush_io_queue(BlockDriverState *bs)
> #endif
> }
>
> -static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> - BlockCompletionFunc *cb, void *opaque)
> -{
> - return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
> - cb, opaque, QEMU_AIO_READ);
> -}
> -
> -static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
> - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> - BlockCompletionFunc *cb, void *opaque)
> -{
> - return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
> - cb, opaque, QEMU_AIO_WRITE);
> -}
> -
> static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
> BlockCompletionFunc *cb, void *opaque)
> {
> @@ -1701,8 +1695,8 @@ static BlockDriver bdrv_file = {
> .bdrv_co_get_block_status = raw_co_get_block_status,
> .bdrv_co_write_zeroes = raw_co_write_zeroes,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_aio_discard = raw_aio_discard,
> .bdrv_refresh_limits = raw_refresh_limits,
> @@ -2103,8 +2097,8 @@ static BlockDriver bdrv_host_device = {
> .create_opts = &raw_create_opts,
> .bdrv_co_write_zeroes = hdev_co_write_zeroes,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_aio_discard = hdev_aio_discard,
> .bdrv_refresh_limits = raw_refresh_limits,
> @@ -2252,8 +2246,8 @@ static BlockDriver bdrv_host_floppy = {
> .bdrv_create = hdev_create,
> .create_opts = &raw_create_opts,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_refresh_limits = raw_refresh_limits,
> .bdrv_io_plug = raw_aio_plug,
> @@ -2383,8 +2377,8 @@ static BlockDriver bdrv_host_cdrom = {
> .bdrv_create = hdev_create,
> .create_opts = &raw_create_opts,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_refresh_limits = raw_refresh_limits,
> .bdrv_io_plug = raw_aio_plug,
> @@ -2520,8 +2514,8 @@ static BlockDriver bdrv_host_cdrom = {
> .bdrv_create = hdev_create,
> .create_opts = &raw_create_opts,
>
> - .bdrv_aio_readv = raw_aio_readv,
> - .bdrv_aio_writev = raw_aio_writev,
> + .bdrv_co_readv = raw_co_readv,
> + .bdrv_co_writev = raw_co_writev,
> .bdrv_aio_flush = raw_aio_flush,
> .bdrv_refresh_limits = raw_refresh_limits,
> .bdrv_io_plug = raw_aio_plug,
--
Mit freundlichen Grüßen
Peter Lieven
...........................................................
KAMP Netzwerkdienste GmbH
Vestische Str. 89-91 | 46117 Oberhausen
Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
pl@kamp.de | http://www.kamp.de
Geschäftsführer: Heiner Lante | Michael Lante
Amtsgericht Duisburg | HRB Nr. 12154
USt-Id-Nr.: DE 120607556
...........................................................
next prev parent reply other threads:[~2014-11-27 9:51 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-26 14:46 [Qemu-devel] [RFC PATCH 0/3] linux-aio: Convert to coroutines Kevin Wolf
2014-11-26 14:46 ` [Qemu-devel] [RFC PATCH 1/3] qemu-img bench Kevin Wolf
2014-11-28 11:49 ` Stefan Hajnoczi
2014-11-28 12:19 ` Kevin Wolf
2014-12-01 11:15 ` Stefan Hajnoczi
2014-11-26 14:46 ` [Qemu-devel] [RFC PATCH 2/3] raw-posix: Convert Linux AIO submission to coroutines Kevin Wolf
2014-11-27 9:50 ` Peter Lieven [this message]
2014-11-28 12:57 ` Kevin Wolf
2014-11-28 13:44 ` Kevin Wolf
2014-11-28 2:59 ` Ming Lei
2014-11-28 7:33 ` Markus Armbruster
2014-11-28 8:12 ` Ming Lei
2014-11-28 8:59 ` Markus Armbruster
2014-11-28 9:15 ` Ming Lei
2014-11-28 9:44 ` Paolo Bonzini
2014-11-28 10:06 ` Kevin Wolf
2014-11-26 14:46 ` [Qemu-devel] [RFC PATCH 3/3] linux-aio: Don't reenter request coroutine recursively Kevin Wolf
2014-12-04 14:37 ` Kevin Wolf
2014-12-04 15:22 ` Ming Lei
2014-12-04 15:39 ` Kevin Wolf
2014-12-04 15:45 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5476F3F1.6000105@kamp.de \
--to=pl@kamp.de \
--cc=kwolf@redhat.com \
--cc=ming.lei@canonical.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.