From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: qemu-block@nongnu.org
Subject: [Qemu-devel] [PATCH] block: avoid unnecessary bottom halves
Date: Sat, 28 Mar 2015 07:37:18 +0100 [thread overview]
Message-ID: <1427524638-28157-1-git-send-email-pbonzini@redhat.com> (raw)
bdrv_aio_* APIs can use coroutines to achieve asynchronicity. However,
the coroutine may terminate without having yielded back to the caller
(for example because of something that invokes a nested event loop,
or because the coroutine is doing nothing at all). In this case,
the bdrv_aio_* API must delay the completion to the next iteration
of the main loop, because bdrv_aio_* will never invoke the callback
before returning.
This can be done with a bottom half, and indeed bdrv_aio_* is always
using one for simplicity. It is possible to gain some performance
(~3%) by avoiding this in the common case. A new field in the
BlockAIOCBCoroutine struct is set to true until the first time the
corotine has yielded to its creator, and completion goes through a
new function bdrv_co_complete. If the flag is false, bdrv_co_complete
invokes the callback immediately. If it is true, the caller will
notice that the coroutine has completed and schedule the bottom
half itself.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
block.c | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/block.c b/block.c
index 0fe97de..3fe0642 100644
--- a/block.c
+++ b/block.c
@@ -4789,6 +4789,7 @@ typedef struct BlockAIOCBCoroutine {
BlockAIOCB common;
BlockRequest req;
bool is_write;
+ bool need_bh;
bool *done;
QEMUBH* bh;
} BlockAIOCBCoroutine;
@@ -4797,14 +4798,32 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
.aiocb_size = sizeof(BlockAIOCBCoroutine),
};
+static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
+{
+ if (!acb->need_bh) {
+ acb->common.cb(acb->common.opaque, acb->req.error);
+ qemu_aio_unref(acb);
+ }
+}
+
static void bdrv_co_em_bh(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
- acb->common.cb(acb->common.opaque, acb->req.error);
-
+ assert(!acb->need_bh);
qemu_bh_delete(acb->bh);
- qemu_aio_unref(acb);
+ bdrv_co_complete(acb);
+}
+
+static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
+{
+ acb->need_bh = false;
+ if (acb->req.error != -EINPROGRESS) {
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
}
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
@@ -4821,8 +4840,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
}
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
+ bdrv_co_complete(acb);
}
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
@@ -4838,6 +4856,8 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
BlockAIOCBCoroutine *acb;
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
acb->req.qiov = qiov;
@@ -4847,6 +4867,7 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
co = qemu_coroutine_create(bdrv_co_do_rw);
qemu_coroutine_enter(co, acb);
+ bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
@@ -4856,8 +4877,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_flush(bs);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
+ bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
@@ -4869,10 +4889,13 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
BlockAIOCBCoroutine *acb;
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
qemu_coroutine_enter(co, acb);
+ bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
@@ -4882,8 +4905,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
+ bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
@@ -4896,11 +4918,14 @@ BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
qemu_coroutine_enter(co, acb);
+ bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
--
2.3.4
next reply other threads:[~2015-03-28 6:37 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-28 6:37 Paolo Bonzini [this message]
2015-03-30 15:27 ` [Qemu-devel] [PATCH] block: avoid unnecessary bottom halves Stefan Hajnoczi
2015-04-20 16:46 ` Stefan Hajnoczi
2015-04-20 17:10 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1427524638-28157-1-git-send-email-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).