qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@canonical.com>
To: qemu-devel@nongnu.org, Paolo Bonzini <pbonzini@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>, Ming Lei <ming.lei@canonical.com>,
	Fam Zheng <famz@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>
Subject: [Qemu-devel] [PATCH RFC 03/14] block: support to bypass qemu coroutinue
Date: Thu, 17 Jul 2014 00:31:10 +0800	[thread overview]
Message-ID: <1405528281-23744-4-git-send-email-ming.lei@canonical.com> (raw)
In-Reply-To: <1405528281-23744-1-git-send-email-ming.lei@canonical.com>

This patch adds bypass mode support for the coroutinue
in bdrv_co_aio_rw_vector(), which is in the fast path
of lots of block device, especially for virtio-blk dataplane.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block.c |  129 +++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 105 insertions(+), 24 deletions(-)

diff --git a/block.c b/block.c
index 128a14f..db7ba37 100644
--- a/block.c
+++ b/block.c
@@ -55,6 +55,21 @@ struct BdrvDirtyBitmap {
     QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
+typedef struct CoroutineIOCompletion {
+    Coroutine *coroutine;
+    int ret;
+    bool bypass;
+    QEMUIOVector *bounced_iov;
+} CoroutineIOCompletion;
+
+typedef struct BlockDriverAIOCBCoroutine {
+    BlockDriverAIOCB common;
+    BlockRequest req;
+    bool is_write;
+    bool *done;
+    QEMUBH *bh;
+} BlockDriverAIOCBCoroutine;
+
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
 #define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
@@ -122,6 +137,21 @@ int is_windows_drive(const char *filename)
 }
 #endif
 
+static CoroutineIOCompletion *bdrv_get_co_io_comp(BlockDriverAIOCBCoroutine
+                                                  *acb)
+{
+    return (CoroutineIOCompletion *)((void *)acb +
+               sizeof(BlockDriverAIOCBCoroutine));
+}
+
+static BlockDriverAIOCBCoroutine *bdrv_get_aio_co(CoroutineIOCompletion *co)
+{
+    assert(co->bypass);
+
+    return (BlockDriverAIOCBCoroutine *)((void *)co -
+               sizeof(BlockDriverAIOCBCoroutine));
+}
+
 /* throttling disk I/O limits */
 void bdrv_set_io_limits(BlockDriverState *bs,
                         ThrottleConfig *cfg)
@@ -3074,7 +3104,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
             ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
                                      &local_qiov);
 
-            qemu_iovec_destroy(&local_qiov);
+
+            if (qemu_coroutine_self_bypassed()) {
+                CoroutineIOCompletion *pco = bdrv_get_co_io_comp(
+                                             (BlockDriverAIOCBCoroutine *)
+                                             qemu_coroutine_get_var());
+                pco->bounced_iov = g_malloc(sizeof(QEMUIOVector));
+                *pco->bounced_iov = local_qiov;
+            } else {
+                qemu_iovec_destroy(&local_qiov);
+            }
         } else {
             ret = 0;
         }
@@ -4652,15 +4691,6 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }
 
-
-typedef struct BlockDriverAIOCBCoroutine {
-    BlockDriverAIOCB common;
-    BlockRequest req;
-    bool is_write;
-    bool *done;
-    QEMUBH* bh;
-} BlockDriverAIOCBCoroutine;
-
 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 {
     AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
@@ -4679,6 +4709,12 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
     .cancel             = bdrv_aio_co_cancel_em,
 };
 
+static const AIOCBInfo bdrv_em_co_bypass_aiocb_info = {
+    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine) +
+                          sizeof(CoroutineIOCompletion),
+    .cancel             = bdrv_aio_co_cancel_em,
+};
+
 static void bdrv_co_em_bh(void *opaque)
 {
     BlockDriverAIOCBCoroutine *acb = opaque;
@@ -4698,6 +4734,12 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
 {
     BlockDriverAIOCBCoroutine *acb = opaque;
     BlockDriverState *bs = acb->common.bs;
+    bool bypass = qemu_coroutine_self_bypassed();
+
+    if (bypass) {
+        qemu_coroutine_set_var(acb);
+        memset(bdrv_get_co_io_comp(acb), 0, sizeof(CoroutineIOCompletion));
+    }
 
     if (!acb->is_write) {
         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
@@ -4707,8 +4749,10 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
     }
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-    qemu_bh_schedule(acb->bh);
+    if (!bypass) {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+        qemu_bh_schedule(acb->bh);
+    }
 }
 
 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
@@ -4722,8 +4766,18 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
 {
     Coroutine *co;
     BlockDriverAIOCBCoroutine *acb;
+    const AIOCBInfo *aiocb_info;
+    bool bypass;
 
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    if (qemu_aio_get_bypass_co(bdrv_get_aio_context(bs))) {
+        aiocb_info = &bdrv_em_co_bypass_aiocb_info;
+        bypass = true;
+    } else {
+        aiocb_info = &bdrv_em_co_aiocb_info;
+        bypass = false;
+    }
+
+    acb = qemu_aio_get(aiocb_info, bs, cb, opaque);
     acb->req.sector = sector_num;
     acb->req.nb_sectors = nb_sectors;
     acb->req.qiov = qiov;
@@ -4731,8 +4785,14 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->is_write = is_write;
     acb->done = NULL;
 
-    co = qemu_coroutine_create(bdrv_co_do_rw);
-    qemu_coroutine_enter(co, acb);
+    if (!bypass) {
+        co = qemu_coroutine_create(bdrv_co_do_rw);
+        qemu_coroutine_enter(co, acb);
+    } else {
+        qemu_coroutine_set_bypass(true);
+        bdrv_co_do_rw(acb);
+        qemu_coroutine_set_bypass(false);
+    }
 
     return &acb->common;
 }
@@ -4826,17 +4886,28 @@ void qemu_aio_release(void *p)
 /**************************************************************/
 /* Coroutine block device emulation */
 
-typedef struct CoroutineIOCompletion {
-    Coroutine *coroutine;
-    int ret;
-} CoroutineIOCompletion;
-
 static void bdrv_co_io_em_complete(void *opaque, int ret)
 {
     CoroutineIOCompletion *co = opaque;
 
     co->ret = ret;
-    qemu_coroutine_enter(co->coroutine, NULL);
+
+    if (!co->bypass) {
+        qemu_coroutine_enter(co->coroutine, NULL);
+    } else {
+        BlockDriverAIOCBCoroutine *acb = bdrv_get_aio_co(co);
+
+        acb->common.cb(acb->common.opaque, ret);
+        if (acb->done) {
+            *acb->done = true;
+        }
+        qemu_aio_release(acb);
+
+        if (co->bounced_iov) {
+            qemu_iovec_destroy(co->bounced_iov);
+            g_free(co->bounced_iov);
+        }
+    }
 }
 
 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
@@ -4846,21 +4917,31 @@ static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
     CoroutineIOCompletion co = {
         .coroutine = qemu_coroutine_self(),
     };
+    CoroutineIOCompletion *pco = &co;
     BlockDriverAIOCB *acb;
 
+    if (qemu_coroutine_bypassed(co.coroutine)) {
+        pco = bdrv_get_co_io_comp((BlockDriverAIOCBCoroutine *)
+                                   qemu_coroutine_get_var());
+        pco->bypass = true;
+    }
+
     if (is_write) {
         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
-                                       bdrv_co_io_em_complete, &co);
+                                       bdrv_co_io_em_complete, pco);
     } else {
         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
-                                      bdrv_co_io_em_complete, &co);
+                                      bdrv_co_io_em_complete, pco);
     }
 
     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
     if (!acb) {
         return -EIO;
     }
-    qemu_coroutine_yield();
+
+    if (!pco->bypass) {
+        qemu_coroutine_yield();
+    }
 
     return co.ret;
 }
-- 
1.7.9.5

  parent reply	other threads:[~2014-07-16 16:32 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 01/14] qemu coroutine: support bypass mode Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 02/14] qemu aio: prepare for supporting selective bypass coroutine Ming Lei
2014-07-16 16:31 ` Ming Lei [this message]
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 04/14] Revert "raw-posix: drop raw_get_aio_fd() since it is no longer used" Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 05/14] dataplane: enable selective bypassing coroutine Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 06/14] qemu/obj_pool.h: introduce object allocation pool Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 07/14] dataplane: use object pool to speed up allocation for virtio blk request Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 08/14] linux-aio: fix submit aio as a batch Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 09/14] linux-aio: increase max event to 256 Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 10/14] linux-aio: remove 'node' from 'struct qemu_laiocb' Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 11/14] hw/virtio-pci: introduce num_queues property Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 12/14] hw/virtio/virtio-blk.h: introduce VIRTIO_BLK_F_MQ Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 13/14] hw/block/virtio-blk: create num_queues vqs if dataplane is enabled Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 14/14] dataplane: virtio-blk: support mutlti virtqueue Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1405528281-23744-4-git-send-email-ming.lei@canonical.com \
    --to=ming.lei@canonical.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).