* [Qemu-devel] [PATCH RFC 01/14] qemu coroutine: support bypass mode
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 02/14] qemu aio: prepare for supporting selective bypass coroutine Ming Lei
` (12 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch introduces several APIs for supporting bypass qemu coroutine
in case of being not necessary and for performance's sake.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
include/block/coroutine.h | 7 +++++++
include/block/coroutine_int.h | 5 +++++
qemu-coroutine-lock.c | 4 ++--
qemu-coroutine.c | 33 +++++++++++++++++++++++++++++++++
4 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b9b7f48..9bd64da 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -234,4 +234,11 @@ void coroutine_fn yield_until_fd_readable(int fd);
*/
void qemu_coroutine_adjust_pool_size(int n);
+/* qemu coroutine bypass APIs */
+void qemu_coroutine_set_bypass(bool bypass);
+bool qemu_coroutine_bypassed(Coroutine *self);
+bool qemu_coroutine_self_bypassed(void);
+void qemu_coroutine_set_var(void *var);
+void *qemu_coroutine_get_var(void);
+
#endif /* QEMU_COROUTINE_H */
diff --git a/include/block/coroutine_int.h b/include/block/coroutine_int.h
index f133d65..106d0b2 100644
--- a/include/block/coroutine_int.h
+++ b/include/block/coroutine_int.h
@@ -39,6 +39,11 @@ struct Coroutine {
Coroutine *caller;
QSLIST_ENTRY(Coroutine) pool_next;
+ bool bypass;
+
+ /* only used in bypass mode */
+ void *opaque;
+
/* Coroutines that should be woken up when we yield or terminate */
QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
QTAILQ_ENTRY(Coroutine) co_queue_next;
diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c
index e4860ae..7c69ff6 100644
--- a/qemu-coroutine-lock.c
+++ b/qemu-coroutine-lock.c
@@ -82,13 +82,13 @@ static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
{
- assert(qemu_in_coroutine());
+ assert(qemu_in_coroutine() || qemu_coroutine_self_bypassed());
return qemu_co_queue_do_restart(queue, true);
}
void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
{
- assert(qemu_in_coroutine());
+ assert(qemu_in_coroutine() || qemu_coroutine_self_bypassed());
qemu_co_queue_do_restart(queue, false);
}
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index bd574aa..324f5ad 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -157,3 +157,36 @@ void qemu_coroutine_adjust_pool_size(int n)
qemu_mutex_unlock(&pool_lock);
}
+
+void qemu_coroutine_set_bypass(bool bypass)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ self->bypass = bypass;
+}
+
+bool qemu_coroutine_bypassed(Coroutine *self)
+{
+ return self->bypass;
+}
+
+bool qemu_coroutine_self_bypassed(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ return qemu_coroutine_bypassed(self);
+}
+
+void qemu_coroutine_set_var(void *var)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ self->opaque = var;
+}
+
+void *qemu_coroutine_get_var(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ return self->opaque;
+}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 02/14] qemu aio: prepare for supporting selective bypass coroutine
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 01/14] qemu coroutine: support bypass mode Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 03/14] block: support to bypass qemu coroutinue Ming Lei
` (11 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
If device thinks that it isn't necessary to apply coroutine
in its performance sensitive path, it can call
qemu_aio_set_bypass_co(false) to bypass the coroutine which
has supported bypass mode and just call the function directly.
One example is virtio-blk dataplane.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
async.c | 1 +
include/block/aio.h | 13 +++++++++++++
2 files changed, 14 insertions(+)
diff --git a/async.c b/async.c
index 34af0b2..251a074 100644
--- a/async.c
+++ b/async.c
@@ -293,6 +293,7 @@ AioContext *aio_context_new(void)
(EventNotifierHandler *)
event_notifier_test_and_clear);
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
+ qemu_aio_set_bypass_co(ctx, false);
return ctx;
}
diff --git a/include/block/aio.h b/include/block/aio.h
index c23de3c..48d827e 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -87,6 +87,9 @@ struct AioContext {
/* TimerLists for calling timers - one per clock type */
QEMUTimerListGroup tlg;
+
+ /* support selective bypass coroutine */
+ bool bypass_co;
};
/* Used internally to synchronize aio_poll against qemu_bh_schedule. */
@@ -303,4 +306,14 @@ static inline void aio_timer_init(AioContext *ctx,
timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
}
+static inline void qemu_aio_set_bypass_co(AioContext *ctx, bool bypass)
+{
+ ctx->bypass_co = bypass;
+}
+
+static inline bool qemu_aio_get_bypass_co(AioContext *ctx)
+{
+ return ctx->bypass_co;
+}
+
#endif
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 03/14] block: support to bypass qemu coroutinue
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 01/14] qemu coroutine: support bypass mode Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 02/14] qemu aio: prepare for supporting selective bypass coroutine Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 04/14] Revert "raw-posix: drop raw_get_aio_fd() since it is no longer used" Ming Lei
` (10 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch adds bypass mode support for the coroutinue
in bdrv_co_aio_rw_vector(), which is in the fast path
of lots of block device, especially for virtio-blk dataplane.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 105 insertions(+), 24 deletions(-)
diff --git a/block.c b/block.c
index 128a14f..db7ba37 100644
--- a/block.c
+++ b/block.c
@@ -55,6 +55,21 @@ struct BdrvDirtyBitmap {
QLIST_ENTRY(BdrvDirtyBitmap) list;
};
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+ bool bypass;
+ QEMUIOVector *bounced_iov;
+} CoroutineIOCompletion;
+
+typedef struct BlockDriverAIOCBCoroutine {
+ BlockDriverAIOCB common;
+ BlockRequest req;
+ bool is_write;
+ bool *done;
+ QEMUBH *bh;
+} BlockDriverAIOCBCoroutine;
+
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
@@ -122,6 +137,21 @@ int is_windows_drive(const char *filename)
}
#endif
+static CoroutineIOCompletion *bdrv_get_co_io_comp(BlockDriverAIOCBCoroutine
+ *acb)
+{
+ return (CoroutineIOCompletion *)((void *)acb +
+ sizeof(BlockDriverAIOCBCoroutine));
+}
+
+static BlockDriverAIOCBCoroutine *bdrv_get_aio_co(CoroutineIOCompletion *co)
+{
+ assert(co->bypass);
+
+ return (BlockDriverAIOCBCoroutine *)((void *)co -
+ sizeof(BlockDriverAIOCBCoroutine));
+}
+
/* throttling disk I/O limits */
void bdrv_set_io_limits(BlockDriverState *bs,
ThrottleConfig *cfg)
@@ -3074,7 +3104,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
&local_qiov);
- qemu_iovec_destroy(&local_qiov);
+
+ if (qemu_coroutine_self_bypassed()) {
+ CoroutineIOCompletion *pco = bdrv_get_co_io_comp(
+ (BlockDriverAIOCBCoroutine *)
+ qemu_coroutine_get_var());
+ pco->bounced_iov = g_malloc(sizeof(QEMUIOVector));
+ *pco->bounced_iov = local_qiov;
+ } else {
+ qemu_iovec_destroy(&local_qiov);
+ }
} else {
ret = 0;
}
@@ -4652,15 +4691,6 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
}
-
-typedef struct BlockDriverAIOCBCoroutine {
- BlockDriverAIOCB common;
- BlockRequest req;
- bool is_write;
- bool *done;
- QEMUBH* bh;
-} BlockDriverAIOCBCoroutine;
-
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
{
AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
@@ -4679,6 +4709,12 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
.cancel = bdrv_aio_co_cancel_em,
};
+static const AIOCBInfo bdrv_em_co_bypass_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBCoroutine) +
+ sizeof(CoroutineIOCompletion),
+ .cancel = bdrv_aio_co_cancel_em,
+};
+
static void bdrv_co_em_bh(void *opaque)
{
BlockDriverAIOCBCoroutine *acb = opaque;
@@ -4698,6 +4734,12 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
{
BlockDriverAIOCBCoroutine *acb = opaque;
BlockDriverState *bs = acb->common.bs;
+ bool bypass = qemu_coroutine_self_bypassed();
+
+ if (bypass) {
+ qemu_coroutine_set_var(acb);
+ memset(bdrv_get_co_io_comp(acb), 0, sizeof(CoroutineIOCompletion));
+ }
if (!acb->is_write) {
acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
@@ -4707,8 +4749,10 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
}
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
+ if (!bypass) {
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
}
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
@@ -4722,8 +4766,18 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
{
Coroutine *co;
BlockDriverAIOCBCoroutine *acb;
+ const AIOCBInfo *aiocb_info;
+ bool bypass;
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ if (qemu_aio_get_bypass_co(bdrv_get_aio_context(bs))) {
+ aiocb_info = &bdrv_em_co_bypass_aiocb_info;
+ bypass = true;
+ } else {
+ aiocb_info = &bdrv_em_co_aiocb_info;
+ bypass = false;
+ }
+
+ acb = qemu_aio_get(aiocb_info, bs, cb, opaque);
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
acb->req.qiov = qiov;
@@ -4731,8 +4785,14 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
acb->is_write = is_write;
acb->done = NULL;
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
+ if (!bypass) {
+ co = qemu_coroutine_create(bdrv_co_do_rw);
+ qemu_coroutine_enter(co, acb);
+ } else {
+ qemu_coroutine_set_bypass(true);
+ bdrv_co_do_rw(acb);
+ qemu_coroutine_set_bypass(false);
+ }
return &acb->common;
}
@@ -4826,17 +4886,28 @@ void qemu_aio_release(void *p)
/**************************************************************/
/* Coroutine block device emulation */
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
- int ret;
-} CoroutineIOCompletion;
-
static void bdrv_co_io_em_complete(void *opaque, int ret)
{
CoroutineIOCompletion *co = opaque;
co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
+
+ if (!co->bypass) {
+ qemu_coroutine_enter(co->coroutine, NULL);
+ } else {
+ BlockDriverAIOCBCoroutine *acb = bdrv_get_aio_co(co);
+
+ acb->common.cb(acb->common.opaque, ret);
+ if (acb->done) {
+ *acb->done = true;
+ }
+ qemu_aio_release(acb);
+
+ if (co->bounced_iov) {
+ qemu_iovec_destroy(co->bounced_iov);
+ g_free(co->bounced_iov);
+ }
+ }
}
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
@@ -4846,21 +4917,31 @@ static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
+ CoroutineIOCompletion *pco = &co;
BlockDriverAIOCB *acb;
+ if (qemu_coroutine_bypassed(co.coroutine)) {
+ pco = bdrv_get_co_io_comp((BlockDriverAIOCBCoroutine *)
+ qemu_coroutine_get_var());
+ pco->bypass = true;
+ }
+
if (is_write) {
acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
+ bdrv_co_io_em_complete, pco);
} else {
acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
+ bdrv_co_io_em_complete, pco);
}
trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
if (!acb) {
return -EIO;
}
- qemu_coroutine_yield();
+
+ if (!pco->bypass) {
+ qemu_coroutine_yield();
+ }
return co.ret;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 04/14] Revert "raw-posix: drop raw_get_aio_fd() since it is no longer used"
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (2 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 03/14] block: support to bypass qemu coroutinue Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 05/14] dataplane: enable selective bypassing coroutine Ming Lei
` (9 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This reverts commit 76ef2cf5493a215efc351f48ae7094d6c183fcac.
Reintroduce the helper of raw_get_aio_fd() for enabling
coroutinue bypass mode in case of raw image.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block/raw-posix.c | 34 ++++++++++++++++++++++++++++++++++
include/block/block.h | 9 +++++++++
2 files changed, 43 insertions(+)
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 2bcc73d..98b9626 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2419,6 +2419,40 @@ static BlockDriver bdrv_host_cdrom = {
};
#endif /* __FreeBSD__ */
+#ifdef CONFIG_LINUX_AIO
+/**
+ * Return the file descriptor for Linux AIO
+ *
+ * This function is a layering violation and should be removed when it becomes
+ * possible to call the block layer outside the global mutex. It allows the
+ * caller to hijack the file descriptor so I/O can be performed outside the
+ * block layer.
+ */
+int raw_get_aio_fd(BlockDriverState *bs)
+{
+ BDRVRawState *s;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+
+ if (bs->drv == bdrv_find_format("raw")) {
+ bs = bs->file;
+ }
+
+ /* raw-posix has several protocols so just check for raw_aio_readv */
+ if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
+ return -ENOTSUP;
+ }
+
+ s = bs->opaque;
+ if (!s->use_aio) {
+ return -ENOTSUP;
+ }
+ return s->fd;
+}
+#endif /* CONFIG_LINUX_AIO */
+
static void bdrv_file_init(void)
{
/*
diff --git a/include/block/block.h b/include/block/block.h
index 32d3676..8d15693 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -482,6 +482,15 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+#ifdef CONFIG_LINUX_AIO
+int raw_get_aio_fd(BlockDriverState *bs);
+#else
+static inline int raw_get_aio_fd(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+#endif
+
enum BlockAcctType {
BDRV_ACCT_READ,
BDRV_ACCT_WRITE,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 05/14] dataplane: enable selective bypassing coroutine
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (3 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 04/14] Revert "raw-posix: drop raw_get_aio_fd() since it is no longer used" Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 06/14] qemu/obj_pool.h: introduce object allocation pool Ming Lei
` (8 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch enables selective bypassing for the
coroutine in bdrv_co_aio_rw_vector() if the image
format is raw.
With this patch, ~16% thoughput improvement can
be observed in my laptop based VM test, and ~7%
improvement is observed in the VM based on server.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/dataplane/virtio-blk.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index d6ba65c..2093e4a 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -28,6 +28,7 @@ struct VirtIOBlockDataPlane {
bool started;
bool starting;
bool stopping;
+ bool raw_format;
VirtIOBlkConf *blk;
@@ -193,6 +194,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
error_setg(&s->blocker, "block device is in use by data plane");
bdrv_op_block_all(blk->conf.bs, s->blocker);
+ s->raw_format = (raw_get_aio_fd(blk->conf.bs) >= 0);
+
*dataplane = s;
}
@@ -262,6 +265,10 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
/* Kick right away to begin processing requests already in vring */
event_notifier_set(virtio_queue_get_host_notifier(vq));
+ if (s->raw_format) {
+ qemu_aio_set_bypass_co(s->ctx, true);
+ }
+
/* Get this show started by hooking up our callbacks */
aio_context_acquire(s->ctx);
aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
@@ -291,6 +298,9 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
aio_context_release(s->ctx);
+ if (s->raw_format) {
+ qemu_aio_set_bypass_co(s->ctx, false);
+ }
/* Sync vring state back to virtqueue so that non-dataplane request
* processing can continue when we disable the host notifier below.
*/
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 06/14] qemu/obj_pool.h: introduce object allocation pool
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (4 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 05/14] dataplane: enable selective bypassing coroutine Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 07/14] dataplane: use object pool to speed up allocation for virtio blk request Ming Lei
` (7 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch introduces object allocation pool for speeding up
object allocation in fast path.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
include/qemu/obj_pool.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
create mode 100644 include/qemu/obj_pool.h
diff --git a/include/qemu/obj_pool.h b/include/qemu/obj_pool.h
new file mode 100644
index 0000000..94b5f49
--- /dev/null
+++ b/include/qemu/obj_pool.h
@@ -0,0 +1,64 @@
+#ifndef QEMU_OBJ_POOL_HEAD
+#define QEMU_OBJ_POOL_HEAD
+
+typedef struct {
+ unsigned int size;
+ unsigned int cnt;
+
+ void **free_obj;
+ int free_idx;
+
+ char *objs;
+} ObjPool;
+
+static inline void obj_pool_init(ObjPool *op, void *objs_buf, void **free_objs,
+ unsigned int obj_size, unsigned cnt)
+{
+ int i;
+
+ op->objs = (char *)objs_buf;
+ op->free_obj = free_objs;
+ op->size = obj_size;
+ op->cnt = cnt;
+
+ for (i = 0; i < op->cnt; i++) {
+ op->free_obj[i] = (void *)&op->objs[i * op->size];
+ }
+ op->free_idx = op->cnt;
+}
+
+static inline void *obj_pool_get(ObjPool *op)
+{
+ void *obj;
+
+ if (!op) {
+ return NULL;
+ }
+
+ if (op->free_idx <= 0) {
+ return NULL;
+ }
+
+ obj = op->free_obj[--op->free_idx];
+ return obj;
+}
+
+static inline bool obj_pool_has_obj(ObjPool *op, void *obj)
+{
+ return op && (unsigned long)obj >= (unsigned long)&op->objs[0] &&
+ (unsigned long)obj <=
+ (unsigned long)&op->objs[(op->cnt - 1) * op->size];
+}
+
+static inline void obj_pool_put(ObjPool *op, void *obj)
+{
+ if (!op || !obj_pool_has_obj(op, obj)) {
+ return;
+ }
+
+ assert(op->free_idx < op->cnt);
+
+ op->free_obj[op->free_idx++] = obj;
+}
+
+#endif
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 07/14] dataplane: use object pool to speed up allocation for virtio blk request
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (5 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 06/14] qemu/obj_pool.h: introduce object allocation pool Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 08/14] linux-aio: fix submit aio as a batch Ming Lei
` (6 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
g_slice_new(VirtIOBlockReq), its free pair and access the instance
is a bit slow since sizeof(VirtIOBlockReq) takes more than 40KB,
so use object pool to speed up its allocation and release.
With this patch, ~20% thoughput improvement can
be observed in my laptop based VM test, and ~5%
improvement is observed in the VM based on server.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/dataplane/virtio-blk.c | 12 ++++++++++++
hw/block/virtio-blk.c | 13 +++++++++++--
include/hw/virtio/virtio-blk.h | 2 ++
3 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 2093e4a..828fe99 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -24,6 +24,8 @@
#include "hw/virtio/virtio-bus.h"
#include "qom/object_interfaces.h"
+#define REQ_POOL_SZ 128
+
struct VirtIOBlockDataPlane {
bool started;
bool starting;
@@ -51,6 +53,10 @@ struct VirtIOBlockDataPlane {
Error *blocker;
void (*saved_complete_request)(struct VirtIOBlockReq *req,
unsigned char status);
+
+ VirtIOBlockReq reqs[REQ_POOL_SZ];
+ void *free_reqs[REQ_POOL_SZ];
+ ObjPool req_pool;
};
/* Raise an interrupt to signal guest, if necessary */
@@ -238,6 +244,10 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
return;
}
+ vblk->obj_pool = &s->req_pool;
+ obj_pool_init(vblk->obj_pool, s->reqs, s->free_reqs,
+ sizeof(VirtIOBlockReq), REQ_POOL_SZ);
+
/* Set up guest notifier (irq) */
if (k->set_guest_notifiers(qbus->parent, 1, true) != 0) {
fprintf(stderr, "virtio-blk failed to set guest notifier, "
@@ -298,6 +308,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
aio_context_release(s->ctx);
+ vblk->obj_pool = NULL;
+
if (s->raw_format) {
qemu_aio_set_bypass_co(s->ctx, false);
}
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index c241c50..2a11bc4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -31,7 +31,11 @@
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
{
- VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
+ VirtIOBlockReq *req = obj_pool_get(s->obj_pool);
+
+ if (!req) {
+ req = g_slice_new(VirtIOBlockReq);
+ }
req->dev = s;
req->qiov.size = 0;
req->next = NULL;
@@ -41,7 +45,11 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
void virtio_blk_free_request(VirtIOBlockReq *req)
{
if (req) {
- g_slice_free(VirtIOBlockReq, req);
+ if (obj_pool_has_obj(req->dev->obj_pool, req)) {
+ obj_pool_put(req->dev->obj_pool, req);
+ } else {
+ g_slice_free(VirtIOBlockReq, req);
+ }
}
}
@@ -801,6 +809,7 @@ static void virtio_blk_instance_init(Object *obj)
{
VirtIOBlock *s = VIRTIO_BLK(obj);
+ s->obj_pool = NULL;
object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
(Object **)&s->blk.iothread,
qdev_prop_allow_set_link_before_realize,
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index afb7b8d..49ac234 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -18,6 +18,7 @@
#include "hw/block/block.h"
#include "sysemu/iothread.h"
#include "block/block.h"
+#include "qemu/obj_pool.h"
#define TYPE_VIRTIO_BLK "virtio-blk-device"
#define VIRTIO_BLK(obj) \
@@ -135,6 +136,7 @@ typedef struct VirtIOBlock {
Notifier migration_state_notifier;
struct VirtIOBlockDataPlane *dataplane;
#endif
+ ObjPool *obj_pool;
} VirtIOBlock;
typedef struct MultiReqBuffer {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 08/14] linux-aio: fix submit aio as a batch
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (6 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 07/14] dataplane: use object pool to speed up allocation for virtio blk request Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 09/14] linux-aio: increase max event to 256 Ming Lei
` (5 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
In the enqueue path, we can't complete request, otherwise
"Co-routine re-entered recursively" may be caused, so this
patch fixes the issue with below ideas:
- for -EAGAIN or partial completion, retry the submission by
an introduced event handler
- for part of completion, also update the io queue
- for other failure, return the failure if in enqueue path,
otherwise, abort all queued I/O
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block/linux-aio.c | 90 ++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 68 insertions(+), 22 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 7ac7e8c..5eb9c92 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -51,6 +51,7 @@ struct qemu_laio_state {
/* io queue for submit at batch */
LaioQueue io_q;
+ EventNotifier retry; /* handle -EAGAIN and partial completion */
};
static inline ssize_t io_event_ret(struct io_event *ev)
@@ -154,45 +155,80 @@ static void ioq_init(LaioQueue *io_q)
io_q->plugged = 0;
}
-static int ioq_submit(struct qemu_laio_state *s)
+static void abort_queue(struct qemu_laio_state *s)
+{
+ int i;
+ for (i = 0; i < s->io_q.idx; i++) {
+ struct qemu_laiocb *laiocb = container_of(s->io_q.iocbs[i],
+ struct qemu_laiocb,
+ iocb);
+ laiocb->ret = -EIO;
+ qemu_laio_process_completion(s, laiocb);
+ }
+}
+
+static int ioq_submit(struct qemu_laio_state *s, bool enqueue)
{
int ret, i = 0;
int len = s->io_q.idx;
+ int j = 0;
- do {
- ret = io_submit(s->ctx, len, s->io_q.iocbs);
- } while (i++ < 3 && ret == -EAGAIN);
+ if (!len) {
+ return 0;
+ }
- /* empty io queue */
- s->io_q.idx = 0;
+ ret = io_submit(s->ctx, len, s->io_q.iocbs);
+ if (ret == -EAGAIN) {
+ event_notifier_set(&s->retry);
+ return 0;
+ } else if (ret < 0) {
+ if (enqueue) {
+ return ret;
+ }
- if (ret < 0) {
- i = 0;
- } else {
- i = ret;
+ /* in non-queue path, all IOs have to be completed */
+ abort_queue(s);
+ ret = len;
+ } else if (ret == 0) {
+ goto out;
}
- for (; i < len; i++) {
- struct qemu_laiocb *laiocb =
- container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
-
- laiocb->ret = (ret < 0) ? ret : -EIO;
- qemu_laio_process_completion(s, laiocb);
+ for (i = ret; i < len; i++) {
+ s->io_q.iocbs[j++] = s->io_q.iocbs[i];
}
+
+ out:
+ /* update io queue */
+ s->io_q.idx -= ret;
+
return ret;
}
-static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
+static void ioq_submit_retry(EventNotifier *e)
+{
+ struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, retry);
+
+ event_notifier_test_and_clear(e);
+ ioq_submit(s, false);
+}
+
+static int ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
{
unsigned int idx = s->io_q.idx;
+ if (unlikely(idx == s->io_q.size)) {
+ return -1;
+ }
+
s->io_q.iocbs[idx++] = iocb;
s->io_q.idx = idx;
- /* submit immediately if queue is full */
- if (idx == s->io_q.size) {
- ioq_submit(s);
+ /* submit immediately if queue depth is above 2/3 */
+ if (idx > s->io_q.size * 2 / 3) {
+ return ioq_submit(s, true);
}
+
+ return 0;
}
void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
@@ -214,7 +250,7 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
}
if (s->io_q.idx > 0) {
- ret = ioq_submit(s);
+ ret = ioq_submit(s, false);
}
return ret;
@@ -258,7 +294,9 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
goto out_free_aiocb;
}
} else {
- ioq_enqueue(s, iocbs);
+ if (ioq_enqueue(s, iocbs) < 0) {
+ goto out_free_aiocb;
+ }
}
return &laiocb->common;
@@ -272,6 +310,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
struct qemu_laio_state *s = s_;
aio_set_event_notifier(old_context, &s->e, NULL);
+ aio_set_event_notifier(old_context, &s->retry, NULL);
}
void laio_attach_aio_context(void *s_, AioContext *new_context)
@@ -279,6 +318,7 @@ void laio_attach_aio_context(void *s_, AioContext *new_context)
struct qemu_laio_state *s = s_;
aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+ aio_set_event_notifier(new_context, &s->retry, ioq_submit_retry);
}
void *laio_init(void)
@@ -295,9 +335,14 @@ void *laio_init(void)
}
ioq_init(&s->io_q);
+ if (event_notifier_init(&s->retry, false) < 0) {
+ goto out_notifer_init;
+ }
return s;
+out_notifer_init:
+ io_destroy(s->ctx);
out_close_efd:
event_notifier_cleanup(&s->e);
out_free_state:
@@ -310,6 +355,7 @@ void laio_cleanup(void *s_)
struct qemu_laio_state *s = s_;
event_notifier_cleanup(&s->e);
+ event_notifier_cleanup(&s->retry);
if (io_destroy(s->ctx) != 0) {
fprintf(stderr, "%s: destroy AIO context %p failed\n",
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 09/14] linux-aio: increase max event to 256
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (7 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 08/14] linux-aio: fix submit aio as a batch Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 10/14] linux-aio: remove 'node' from 'struct qemu_laiocb' Ming Lei
` (4 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch increases max event to 256 for the comming
virtio-blk multi virtqueue support.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block/linux-aio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 5eb9c92..c06a57d 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -23,7 +23,7 @@
* than this we will get EAGAIN from io_submit which is communicated to
* the guest as an I/O error.
*/
-#define MAX_EVENTS 128
+#define MAX_EVENTS 256
#define MAX_QUEUED_IO 128
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 10/14] linux-aio: remove 'node' from 'struct qemu_laiocb'
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (8 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 09/14] linux-aio: increase max event to 256 Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 11/14] hw/virtio-pci: introduce num_queues property Ming Lei
` (3 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
No one uses the 'node' field any more, so remove it
from 'struct qemu_laiocb', and this can save 16byte
for the struct on 64bit arch.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block/linux-aio.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index c06a57d..337f879 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -35,7 +35,6 @@ struct qemu_laiocb {
size_t nbytes;
QEMUIOVector *qiov;
bool is_read;
- QLIST_ENTRY(qemu_laiocb) node;
};
typedef struct {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 11/14] hw/virtio-pci: introduce num_queues property
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (9 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 10/14] linux-aio: remove 'node' from 'struct qemu_laiocb' Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 12/14] hw/virtio/virtio-blk.h: introduce VIRTIO_BLK_F_MQ Ming Lei
` (2 subsequent siblings)
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch introduces the parameter of 'num_queues', and
prepare for supporting mutli vqs of virtio-blk.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/virtio-blk.c | 1 +
include/hw/virtio/virtio-blk.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 2a11bc4..ab99156 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -826,6 +826,7 @@ static Property virtio_blk_properties[] = {
#endif
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false),
+ DEFINE_PROP_UINT32("num_queues", VirtIOBlock, blk.num_queues, 1),
#endif
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 49ac234..45f8894 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -114,6 +114,7 @@ struct VirtIOBlkConf
uint32_t scsi;
uint32_t config_wce;
uint32_t data_plane;
+ uint32_t num_queues;
};
struct VirtIOBlockDataPlane;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 12/14] hw/virtio/virtio-blk.h: introduce VIRTIO_BLK_F_MQ
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (10 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 11/14] hw/virtio-pci: introduce num_queues property Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 13/14] hw/block/virtio-blk: create num_queues vqs if dataplane is enabled Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 14/14] dataplane: virtio-blk: support mutlti virtqueue Ming Lei
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
Prepare for supporting mutli vqs per virtio-blk device.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
include/hw/virtio/virtio-blk.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 45f8894..ad70c9a 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -42,6 +42,12 @@
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* write cache configurable */
+/*
+ * support multi vqs, and virtio_blk_config.num_queues is only
+ * available when this feature is enabled
+ */
+#define VIRTIO_BLK_F_MQ 12
+
#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
struct virtio_blk_config
@@ -58,6 +64,8 @@ struct virtio_blk_config
uint16_t min_io_size;
uint32_t opt_io_size;
uint8_t wce;
+ uint8_t unused;
+ uint16_t num_queues; /* must be at the end */
} QEMU_PACKED;
/* These two define direction. */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 13/14] hw/block/virtio-blk: create num_queues vqs if dataplane is enabled
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (11 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 12/14] hw/virtio/virtio-blk.h: introduce VIRTIO_BLK_F_MQ Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 14/14] dataplane: virtio-blk: support mutlti virtqueue Ming Lei
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
Now we only support multi vqs for dataplane case.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/virtio-blk.c | 16 +++++++++++++++-
include/hw/virtio/virtio-blk.h | 3 +++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index ab99156..160b021 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -556,6 +556,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
blkcfg.alignment_offset = 0;
blkcfg.wce = bdrv_enable_write_cache(s->bs);
+ stw_p(&blkcfg.num_queues, s->blk.num_queues);
memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
}
@@ -590,6 +591,12 @@ static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
if (bdrv_is_read_only(s->bs))
features |= 1 << VIRTIO_BLK_F_RO;
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ if (s->blk.num_queues > 1) {
+ features |= 1 << VIRTIO_BLK_F_MQ;
+ }
+#endif
+
return features;
}
@@ -739,8 +746,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
Error *err = NULL;
#endif
+ int i;
static int virtio_blk_id;
+#ifndef CONFIG_VIRTIO_BLK_DATA_PLANE
+ blk->num_queues = 1;
+#endif
+
if (!blk->conf.bs) {
error_setg(errp, "drive property not set");
return;
@@ -765,7 +777,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
s->rq = NULL;
s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
- s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+ for (i = 0; i < blk->num_queues; i++)
+ s->vqs[i] = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+ s->vq = s->vqs[0];
s->complete_request = virtio_blk_complete_request;
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index ad70c9a..91489b0 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -50,6 +50,8 @@
#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
+#define VIRTIO_BLK_MAX_VQS 16 /* max virtio queues supported now */
+
struct virtio_blk_config
{
uint64_t capacity;
@@ -132,6 +134,7 @@ typedef struct VirtIOBlock {
VirtIODevice parent_obj;
BlockDriverState *bs;
VirtQueue *vq;
+ VirtQueue *vqs[VIRTIO_BLK_MAX_VQS];
void *rq;
QEMUBH *bh;
BlockConf *conf;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC 14/14] dataplane: virtio-blk: support mutlti virtqueue
2014-07-16 16:31 [Qemu-devel] [PATCH RFC 00/14] dataplane: performance optimization and multi virtqueue Ming Lei
` (12 preceding siblings ...)
2014-07-16 16:31 ` [Qemu-devel] [PATCH RFC 13/14] hw/block/virtio-blk: create num_queues vqs if dataplane is enabled Ming Lei
@ 2014-07-16 16:31 ` Ming Lei
13 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2014-07-16 16:31 UTC (permalink / raw)
To: qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch supports to handle host notify from multi
virt queues, but still process/submit io in the
one iothread.
Currently this patch brings up below improvement
with two virtqueues(against single virtqueue):
---------------------------------------------------
| VM in server host | VM in laptop host
---------------------------------------------------
JOBS=2 | +8% | -11%
---------------------------------------------------
JOBS=4 | +64% | +29%
---------------------------------------------------
The reason is that commit 580b6b2aa2(dataplane: use the QEMU
block layer for I/O) causes average submitting time for single
request doubled, so io thread performance get decreased.
Based on QEMU 2.0, only this single patch can achieve
very good improvement:
http://marc.info/?l=linux-api&m=140377573830230&w=2
So hope QEMU block layer can get optimized for linux aio,
or maybe a fast path is needed for linux aio.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/dataplane/virtio-blk.c | 209 ++++++++++++++++++++++++++++-----------
include/hw/virtio/virtio-blk.h | 1 +
2 files changed, 153 insertions(+), 57 deletions(-)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 828fe99..bd66274 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -26,6 +26,11 @@
#define REQ_POOL_SZ 128
+typedef struct {
+ EventNotifier notifier;
+ VirtIOBlockDataPlane *s;
+} VirtIOBlockNotifier;
+
struct VirtIOBlockDataPlane {
bool started;
bool starting;
@@ -35,9 +40,10 @@ struct VirtIOBlockDataPlane {
VirtIOBlkConf *blk;
VirtIODevice *vdev;
- Vring vring; /* virtqueue vring */
- EventNotifier *guest_notifier; /* irq */
- QEMUBH *bh; /* bh for guest notification */
+ Vring *vring; /* virtqueue vring */
+ EventNotifier **guest_notifier; /* irq */
+ uint64_t pending_guest_notifier; /* pending guest notifer for vq */
+ QEMUBH *bh; /* bh for guest notification */
/* Note that these EventNotifiers are assigned by value. This is
* fine as long as you do not call event_notifier_cleanup on them
@@ -47,7 +53,9 @@ struct VirtIOBlockDataPlane {
IOThread *iothread;
IOThread internal_iothread_obj;
AioContext *ctx;
- EventNotifier host_notifier; /* doorbell */
+ VirtIOBlockNotifier *host_notifier; /* doorbell */
+ uint64_t pending_host_notifier; /* pending host notifer for vq */
+ QEMUBH *host_notifier_bh; /* for handle host notifier */
/* Operation blocker on BDS */
Error *blocker;
@@ -60,20 +68,26 @@ struct VirtIOBlockDataPlane {
};
/* Raise an interrupt to signal guest, if necessary */
-static void notify_guest(VirtIOBlockDataPlane *s)
+static void notify_guest(VirtIOBlockDataPlane *s, unsigned int qid)
{
- if (!vring_should_notify(s->vdev, &s->vring)) {
- return;
+ if (vring_should_notify(s->vdev, &s->vring[qid])) {
+ event_notifier_set(s->guest_notifier[qid]);
}
-
- event_notifier_set(s->guest_notifier);
}
static void notify_guest_bh(void *opaque)
{
VirtIOBlockDataPlane *s = opaque;
+ unsigned int qid;
+ uint64_t pending = s->pending_guest_notifier;
+
+ s->pending_guest_notifier = 0;
- notify_guest(s);
+ while ((qid = ffsl(pending))) {
+ qid--;
+ notify_guest(s, qid);
+ pending &= ~(1 << qid);
+ }
}
static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
@@ -81,7 +95,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
VirtIOBlockDataPlane *s = req->dev->dataplane;
stb_p(&req->in->status, status);
- vring_push(&req->dev->dataplane->vring, &req->elem,
+ vring_push(&s->vring[req->qid], &req->elem,
req->qiov.size + sizeof(*req->in));
/* Suppress notification to guest by BH and its scheduled
@@ -90,17 +104,15 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
* executed in dataplane aio context even after it is
* stopped, so needn't worry about notification loss with BH.
*/
+ assert(req->qid < 64);
+ s->pending_guest_notifier |= (1 << req->qid);
qemu_bh_schedule(s->bh);
}
-static void handle_notify(EventNotifier *e)
+static void process_vq_notify(VirtIOBlockDataPlane *s, unsigned short qid)
{
- VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
- host_notifier);
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
- event_notifier_test_and_clear(&s->host_notifier);
- bdrv_io_plug(s->blk->conf.bs);
for (;;) {
MultiReqBuffer mrb = {
.num_writes = 0,
@@ -108,12 +120,13 @@ static void handle_notify(EventNotifier *e)
int ret;
/* Disable guest->host notifies to avoid unnecessary vmexits */
- vring_disable_notification(s->vdev, &s->vring);
+ vring_disable_notification(s->vdev, &s->vring[qid]);
for (;;) {
VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
- ret = vring_pop(s->vdev, &s->vring, &req->elem);
+ req->qid = qid;
+ ret = vring_pop(s->vdev, &s->vring[qid], &req->elem);
if (ret < 0) {
virtio_blk_free_request(req);
break; /* no more requests */
@@ -132,16 +145,48 @@ static void handle_notify(EventNotifier *e)
/* Re-enable guest->host notifies and stop processing the vring.
* But if the guest has snuck in more descriptors, keep processing.
*/
- if (vring_enable_notification(s->vdev, &s->vring)) {
+ if (vring_enable_notification(s->vdev, &s->vring[qid])) {
break;
}
} else { /* fatal error */
break;
}
}
+}
+
+static void process_notify(void *opaque)
+{
+ VirtIOBlockDataPlane *s = opaque;
+ unsigned int qid;
+ uint64_t pending = s->pending_host_notifier;
+
+ s->pending_host_notifier = 0;
+
+ bdrv_io_plug(s->blk->conf.bs);
+ while ((qid = ffsl(pending))) {
+ qid--;
+ process_vq_notify(s, qid);
+ pending &= ~(1 << qid);
+ }
bdrv_io_unplug(s->blk->conf.bs);
}
+/* TODO: handle requests from other vqs together */
+static void handle_notify(EventNotifier *e)
+{
+ VirtIOBlockNotifier *n = container_of(e, VirtIOBlockNotifier,
+ notifier);
+ VirtIOBlockDataPlane *s = n->s;
+ unsigned int qid = n - &s->host_notifier[0];
+
+ assert(qid < 64);
+
+ event_notifier_test_and_clear(e);
+
+ s->pending_host_notifier |= (1 << qid);
+ qemu_bh_schedule(s->host_notifier_bh);
+}
+
/* Context: QEMU global mutex held */
void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
VirtIOBlockDataPlane **dataplane,
@@ -197,6 +242,11 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
s->ctx = iothread_get_aio_context(s->iothread);
s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
+ s->vring = g_new0(Vring, blk->num_queues);
+ s->guest_notifier = g_new(EventNotifier *, blk->num_queues);
+ s->host_notifier = g_new(VirtIOBlockNotifier, blk->num_queues);
+ s->host_notifier_bh = aio_bh_new(s->ctx, process_notify, s);
+
error_setg(&s->blocker, "block device is in use by data plane");
bdrv_op_block_all(blk->conf.bs, s->blocker);
@@ -217,16 +267,83 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
error_free(s->blocker);
object_unref(OBJECT(s->iothread));
qemu_bh_delete(s->bh);
+ qemu_bh_delete(s->host_notifier_bh);
+ g_free(s->vring);
+ g_free(s->guest_notifier);
+ g_free(s->host_notifier);
g_free(s);
}
+static int pre_start_vq(VirtIOBlockDataPlane *s, BusState *qbus,
+ VirtioBusClass *k)
+{
+ int i;
+ int num = s->blk->num_queues;
+ VirtQueue *vq[num];
+
+ for (i = 0; i < num; i++) {
+ vq[i] = virtio_get_queue(s->vdev, i);
+ if (!vring_setup(&s->vring[i], s->vdev, i)) {
+ return -1;
+ }
+ }
+
+ /* Set up guest notifier (irq) */
+ if (k->set_guest_notifiers(qbus->parent, num, true) != 0) {
+ fprintf(stderr, "virtio-blk failed to set guest notifier, "
+ "ensure -enable-kvm is set\n");
+ exit(1);
+ }
+
+ for (i = 0; i < num; i++)
+ s->guest_notifier[i] = virtio_queue_get_guest_notifier(vq[i]);
+ s->pending_guest_notifier = 0;
+
+ /* Set up virtqueue notify */
+ for (i = 0; i < num; i++) {
+ if (k->set_host_notifier(qbus->parent, i, true) != 0) {
+ fprintf(stderr, "virtio-blk failed to set host notifier\n");
+ exit(1);
+ }
+ s->host_notifier[i].notifier = *virtio_queue_get_host_notifier(vq[i]);
+ s->host_notifier[i].s = s;
+ }
+ s->pending_host_notifier = 0;
+
+ return 0;
+}
+
+static void post_start_vq(VirtIOBlockDataPlane *s)
+{
+ int i;
+ int num = s->blk->num_queues;
+
+ for (i = 0; i < num; i++) {
+ VirtQueue *vq;
+ vq = virtio_get_queue(s->vdev, i);
+
+ /* Kick right away to begin processing requests already in vring */
+ event_notifier_set(virtio_queue_get_host_notifier(vq));
+ }
+
+ if (s->raw_format) {
+ qemu_aio_set_bypass_co(s->ctx, true);
+ }
+
+ /* Get this show started by hooking up our callbacks */
+ aio_context_acquire(s->ctx);
+ for (i = 0; i < num; i++)
+ aio_set_event_notifier(s->ctx, &s->host_notifier[i].notifier,
+ handle_notify);
+ aio_context_release(s->ctx);
+}
+
/* Context: QEMU global mutex held */
void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
- VirtQueue *vq;
if (s->started) {
return;
@@ -238,51 +355,24 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
s->starting = true;
- vq = virtio_get_queue(s->vdev, 0);
- if (!vring_setup(&s->vring, s->vdev, 0)) {
- s->starting = false;
- return;
- }
-
vblk->obj_pool = &s->req_pool;
obj_pool_init(vblk->obj_pool, s->reqs, s->free_reqs,
sizeof(VirtIOBlockReq), REQ_POOL_SZ);
- /* Set up guest notifier (irq) */
- if (k->set_guest_notifiers(qbus->parent, 1, true) != 0) {
- fprintf(stderr, "virtio-blk failed to set guest notifier, "
- "ensure -enable-kvm is set\n");
- exit(1);
- }
- s->guest_notifier = virtio_queue_get_guest_notifier(vq);
-
- /* Set up virtqueue notify */
- if (k->set_host_notifier(qbus->parent, 0, true) != 0) {
- fprintf(stderr, "virtio-blk failed to set host notifier\n");
- exit(1);
- }
- s->host_notifier = *virtio_queue_get_host_notifier(vq);
-
s->saved_complete_request = vblk->complete_request;
vblk->complete_request = complete_request_vring;
+ if (pre_start_vq(s, qbus, k)) {
+ s->starting = false;
+ return;
+ }
+
s->starting = false;
s->started = true;
trace_virtio_blk_data_plane_start(s);
bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
-
- /* Kick right away to begin processing requests already in vring */
- event_notifier_set(virtio_queue_get_host_notifier(vq));
-
- if (s->raw_format) {
- qemu_aio_set_bypass_co(s->ctx, true);
- }
-
- /* Get this show started by hooking up our callbacks */
- aio_context_acquire(s->ctx);
- aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
- aio_context_release(s->ctx);
+ post_start_vq(s);
}
/* Context: QEMU global mutex held */
@@ -291,6 +381,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
+ int i;
+ int num = s->blk->num_queues;
if (!s->started || s->stopping) {
return;
}
@@ -301,7 +393,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
aio_context_acquire(s->ctx);
/* Stop notifications for new requests from guest */
- aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
+ for (i = 0; i < num; i++)
+ aio_set_event_notifier(s->ctx, &s->host_notifier[i].notifier, NULL);
/* Drain and switch bs back to the QEMU main loop */
bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
@@ -316,12 +409,14 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
/* Sync vring state back to virtqueue so that non-dataplane request
* processing can continue when we disable the host notifier below.
*/
- vring_teardown(&s->vring, s->vdev, 0);
+ for (i = 0; i < num; i++)
+ vring_teardown(&s->vring[i], s->vdev, 0);
- k->set_host_notifier(qbus->parent, 0, false);
+ for (i = 0; i < num; i++)
+ k->set_host_notifier(qbus->parent, i, false);
/* Clean up guest notifier (irq) */
- k->set_guest_notifiers(qbus->parent, 1, false);
+ k->set_guest_notifiers(qbus->parent, num, false);
s->started = false;
s->stopping = false;
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 91489b0..e7795e4 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -164,6 +164,7 @@ typedef struct VirtIOBlockReq {
QEMUIOVector qiov;
struct VirtIOBlockReq *next;
BlockAcctCookie acct;
+ unsigned qid;
} VirtIOBlockReq;
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread