qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 16/39] raw-posix: Switch to bdrv_co_* interfaces
Date: Thu, 16 Jun 2016 16:08:05 +0200	[thread overview]
Message-ID: <1466086108-24868-17-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1466086108-24868-1-git-send-email-kwolf@redhat.com>

In order to use the modern byte-based .bdrv_co_preadv/pwritev()
interface, this patch switches raw-posix to coroutine-based interfaces
as a first step. In terms of semantics and performance, it doesn't make
a difference with the existing code whether we go from a coroutine to a
callback-based interface already in block/io.c or only in linux-aio.c

As there have been concerns in the past that this change may be a step
in the wrong direction with respect to a possible AIO fast path, the
old callback-based interface for linux-aio is left around and can be
reactivated when a fast path (e.g. directly from virtio-blk dataplane,
bypassing the whole block layer) is implemented.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/linux-aio.c | 87 +++++++++++++++++++++++++++++++++++++++++--------------
 block/raw-aio.h   |  4 +++
 block/raw-posix.c | 59 +++++++++++++++++--------------------
 3 files changed, 96 insertions(+), 54 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 90ec98e..657577a 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,8 +11,10 @@
 #include "qemu-common.h"
 #include "block/aio.h"
 #include "qemu/queue.h"
+#include "block/block.h"
 #include "block/raw-aio.h"
 #include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
 
 #include <libaio.h>
 
@@ -30,6 +32,7 @@
 
 struct qemu_laiocb {
     BlockAIOCB common;
+    Coroutine *co;
     LinuxAioState *ctx;
     struct iocb iocb;
     ssize_t ret;
@@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
             }
         }
     }
-    laiocb->common.cb(laiocb->common.opaque, ret);
 
-    qemu_aio_unref(laiocb);
+    laiocb->ret = ret;
+    if (laiocb->co) {
+        qemu_coroutine_enter(laiocb->co, NULL);
+    } else {
+        laiocb->common.cb(laiocb->common.opaque, ret);
+        qemu_aio_unref(laiocb);
+    }
 }
 
 /* The completion BH fetches completed I/O requests and invokes their
@@ -230,22 +238,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
     }
 }
 
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+                          int type)
 {
-    struct qemu_laiocb *laiocb;
-    struct iocb *iocbs;
-    off_t offset = sector_num * 512;
-
-    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * 512;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    iocbs = &laiocb->iocb;
+    LinuxAioState *s = laiocb->ctx;
+    struct iocb *iocbs = &laiocb->iocb;
+    QEMUIOVector *qiov = laiocb->qiov;
 
     switch (type) {
     case QEMU_AIO_WRITE:
@@ -258,7 +256,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
     default:
         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
                         __func__, type);
-        goto out_free_aiocb;
+        return -EIO;
     }
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
 
@@ -268,11 +266,56 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
         ioq_submit(s);
     }
-    return &laiocb->common;
 
-out_free_aiocb:
-    qemu_aio_unref(laiocb);
-    return NULL;
+    return 0;
+}
+
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+                                int64_t sector_num, QEMUIOVector *qiov,
+                                int nb_sectors, int type)
+{
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    int ret;
+
+    struct qemu_laiocb laiocb = {
+        .co         = qemu_coroutine_self(),
+        .nbytes     = nb_sectors * BDRV_SECTOR_SIZE,
+        .ctx        = s,
+        .is_read    = (type == QEMU_AIO_READ),
+        .qiov       = qiov,
+    };
+
+    ret = laio_do_submit(fd, &laiocb, offset, type);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+    return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque, int type)
+{
+    struct qemu_laiocb *laiocb;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    int ret;
+
+    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+    laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    laiocb->ctx = s;
+    laiocb->ret = -EINPROGRESS;
+    laiocb->is_read = (type == QEMU_AIO_READ);
+    laiocb->qiov = qiov;
+
+    ret = laio_do_submit(fd, laiocb, offset, type);
+    if (ret < 0) {
+        qemu_aio_unref(laiocb);
+        return NULL;
+    }
+
+    return &laiocb->common;
 }
 
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 714714e..03bbfba 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -15,6 +15,7 @@
 #ifndef QEMU_RAW_AIO_H
 #define QEMU_RAW_AIO_H
 
+#include "qemu/coroutine.h"
 #include "qemu/iov.h"
 
 /* AIO request types */
@@ -38,6 +39,9 @@
 typedef struct LinuxAioState LinuxAioState;
 LinuxAioState *laio_init(void);
 void laio_cleanup(LinuxAioState *s);
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+                                int64_t sector_num, QEMUIOVector *qiov,
+                                int nb_sectors, int type);
 BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ce2e20f..cb98769 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
 
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
+                                  int nb_sectors, QEMUIOVector *qiov, int type)
 {
     BDRVRawState *s = bs->opaque;
 
     if (fd_open(bs) < 0)
-        return NULL;
+        return -EIO;
 
     /*
      * Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
             type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
         } else if (s->use_aio) {
-            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
-                               nb_sectors, cb, opaque, type);
+            return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+                                  nb_sectors, type);
 #endif
         }
     }
 
-    return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
-                       cb, opaque, type);
+    return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
+                          nb_sectors * BDRV_SECTOR_SIZE, type);
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+                                     int nb_sectors, QEMUIOVector *qiov)
+{
+    return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *qiov)
+{
+    return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
 }
 
 static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
 #endif
 }
 
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_WRITE);
-}
-
 static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
         BlockCompletionFunc *cb, void *opaque)
 {
@@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
     .bdrv_co_get_block_status = raw_co_get_block_status,
     .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
 
-    .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
     .bdrv_aio_flush = raw_aio_flush,
     .bdrv_aio_discard = raw_aio_discard,
     .bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
     .create_opts         = &raw_create_opts,
     .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
 
-    .bdrv_aio_readv	= raw_aio_readv,
-    .bdrv_aio_writev	= raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_discard   = hdev_aio_discard,
     .bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_create         = hdev_create,
     .create_opts         = &raw_create_opts,
 
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_create        = hdev_create,
     .create_opts        = &raw_create_opts,
 
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
-- 
1.8.3.1

  parent reply	other threads:[~2016-06-16 14:09 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-16 14:07 [Qemu-devel] [PULL 00/39] Block layer patches Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 01/39] qcow2: Work with bytes in qcow2_get_cluster_offset() Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 02/39] qcow2: Implement .bdrv_co_preadv() Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 03/39] qcow2: Make copy_sectors() byte based Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 04/39] qcow2: Use bytes instead of sectors for QCowL2Meta Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 05/39] qcow2: Implement .bdrv_co_pwritev() Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 06/39] blockdev: clarify error on attempt to open locked tray Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 07/39] hmp: acquire aio_context in hmp_qemu_io Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 08/39] m25p80: fix test on blk_pread() return value Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 09/39] qemu-img bench: Fix uninitialised writethrough mode Kevin Wolf
2016-06-16 14:07 ` [Qemu-devel] [PULL 10/39] block: Avoid bogus flags during mirroring Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 11/39] block: Assert that flags are in range Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 12/39] block: drop support for using qcow[2] encryption with system emulators Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 13/39] block: Byte-based bdrv_co_do_copy_on_readv() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 14/39] block: Prepare bdrv_aligned_preadv() for byte-aligned requests Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 15/39] block: Prepare bdrv_aligned_pwritev() " Kevin Wolf
2016-06-16 14:08 ` Kevin Wolf [this message]
2016-06-16 14:08 ` [Qemu-devel] [PULL 17/39] raw-posix: Implement .bdrv_co_preadv/pwritev Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 18/39] block: Don't enforce 512 byte minimum alignment Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 19/39] linux-aio: Cancel BH if not needed Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 20/39] doc: Fix mailing list address in tests/qemu-iotests/README Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 21/39] block: Introduce bdrv_preadv() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 22/39] block: Make .bdrv_load_vmstate() vectored Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 23/39] block: Allow .bdrv_load/save_vmstate() to return 0/-errno Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 24/39] block: Make bdrv_load/save_vmstate coroutine_fns Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 25/39] qcow2: Let vmstate call qcow2_co_preadv/pwrite directly Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 26/39] block: Remove bs->zero_beyond_eof Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 27/39] block: Fix snapshot=on with aio=native Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 28/39] block: use the block job list in bdrv_drain_all() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 29/39] block: use the block job list in qmp_query_block_jobs() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 30/39] block: Prevent sleeping jobs from resuming if they have been paused Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 31/39] block: Create the commit block job before reopening any image Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 32/39] iotests: 095: Clean up QEMU before showing image info Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 33/39] rbd:change error_setg() to error_setg_errno() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 34/39] block: Allow replacement of a BDS by its overlay Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 35/39] block/mirror: Fix target backing BDS Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 36/39] block/null: Implement bdrv_refresh_filename() Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 37/39] iotests: Add test for post-mirror backing chains Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 38/39] iotests: Add test for oVirt-like storage migration Kevin Wolf
2016-06-16 14:08 ` [Qemu-devel] [PULL 39/39] hbitmap: add 'pos < size' asserts Kevin Wolf
2016-06-16 15:06 ` [Qemu-devel] [PULL 00/39] Block layer patches Peter Maydell
2016-06-16 17:04   ` Eric Blake
2016-06-16 17:08     ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466086108-24868-17-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).