* [Qemu-devel] [PATCH 1/3] block: introduce IO queue APIs
2014-06-30 9:49 [Qemu-devel] [PATCH 0/3] block: introduce submit I/O at batch Ming Lei
@ 2014-06-30 9:49 ` Ming Lei
2014-06-30 9:49 ` [Qemu-devel] [PATCH 2/3] block: linux-aio: support submit io_queue Ming Lei
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2014-06-30 9:49 UTC (permalink / raw)
To: Peter Maydell, qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch introduces IO queue related APIs so that following
patches can support queuing I/O requests and submitting them
at batch for improving I/O performance.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
aio-posix.c | 13 ++++++++
block.c | 78 +++++++++++++++++++++++++++++++++++++++++++++
include/block/aio.h | 12 +++++++
include/block/block.h | 6 ++++
include/block/block_int.h | 3 ++
5 files changed, 112 insertions(+)
diff --git a/aio-posix.c b/aio-posix.c
index f921d4f..3e065c1 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -240,3 +240,16 @@ bool aio_poll(AioContext *ctx, bool blocking)
return progress;
}
+
+/* IO queue support */
+void aio_init_io_queue(AioContext *ctx, unsigned int size)
+{
+ ctx->io_queue.iocbs = g_new0(void *, size);
+ ctx->io_queue.size = size;
+ ctx->io_queue.idx = 0;
+}
+
+void aio_exit_io_queue(AioContext *ctx)
+{
+ g_free(ctx->io_queue.iocbs);
+}
diff --git a/block.c b/block.c
index 217f523..c4a6f8b 100644
--- a/block.c
+++ b/block.c
@@ -1910,6 +1910,7 @@ void bdrv_drain_all(void)
bool bs_busy;
aio_context_acquire(aio_context);
+ bdrv_io_unplug(bs);
bdrv_start_throttled_reqs(bs);
bs_busy = bdrv_requests_pending(bs);
bs_busy |= aio_poll(aio_context, bs_busy);
@@ -5774,3 +5775,80 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
return false;
}
+
+/* IO queue APIs */
+void bdrv_init_io_queue(BlockDriverState *bs, unsigned int size)
+{
+ aio_init_io_queue(bs->aio_context, size);
+}
+
+void bdrv_exit_io_queue(BlockDriverState *bs)
+{
+ aio_exit_io_queue(bs->aio_context);
+}
+
+/* We think one block driver supports feature of io queue if
+ * they implement callback of .bdrv_submit_io_queue
+ */
+static bool __bdrv_support_io_queue(BlockDriverState *bs)
+{
+ if (bs && bs->drv && bs->drv->bdrv_submit_io_queue)
+ return true;
+ return false;
+}
+bool bdrv_support_io_queue(BlockDriverState *bs)
+{
+ if (__bdrv_support_io_queue(bs->file))
+ return true;
+ if (__bdrv_support_io_queue(bs->backing_hd))
+ return true;
+ return false;
+}
+
+static void __bdrv_io_unplug(BlockDriverState *bs)
+{
+ if (!bdrv_support_io_queue(bs))
+ return;
+
+ if (!bs->aio_context->io_queue.idx)
+ return;
+
+ if (__bdrv_support_io_queue(bs->file))
+ bs->file->drv->bdrv_submit_io_queue(bs->file);
+ if (__bdrv_support_io_queue(bs->backing_hd))
+ bs->backing_hd->drv->bdrv_submit_io_queue(bs->backing_hd);
+ bs->aio_context->io_queue.idx = 0;
+}
+
+/* BlockDriver may call this function for queuing current IO request
+ * represented by iocb to io_queue, and will submit them at batch.
+ */
+void bdrv_queue_io(BlockDriverState *bs, void *iocb)
+{
+ unsigned int idx = bs->aio_context->io_queue.idx;
+
+ bs->aio_context->io_queue.iocbs[idx++] = iocb;
+ bs->aio_context->io_queue.idx = idx;
+
+ /* unplug immediately if queue is full */
+ if (idx == bs->aio_context->io_queue.size)
+ __bdrv_io_unplug(bs);
+}
+
+/* Block device calls this function to let driver queue IO request
+ * and submit them at batch later, but it is just a hint because
+ * block driver may not support the feature.
+ */
+void bdrv_io_plug(BlockDriverState *bs)
+{
+ bs->aio_context->io_plugged = true;
+}
+
+/* Block device calls this function to ask driver to submit
+ * all requests in io queue immediatelly.
+ */
+void bdrv_io_unplug(BlockDriverState *bs)
+{
+ __bdrv_io_unplug(bs);
+ bs->aio_context->io_plugged = false;
+}
diff --git a/include/block/aio.h b/include/block/aio.h
index a92511b..b7fdcfb 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -45,6 +45,12 @@ typedef struct AioHandler AioHandler;
typedef void QEMUBHFunc(void *opaque);
typedef void IOHandler(void *opaque);
+typedef struct AioIOQueue {
+ void **iocbs;
+ unsigned int size;
+ unsigned int idx;
+} AioIOQueue;
+
struct AioContext {
GSource source;
@@ -81,6 +87,10 @@ struct AioContext {
/* TimerLists for calling timers - one per clock type */
QEMUTimerListGroup tlg;
+
+ /* IOQueue support */
+ AioIOQueue io_queue;
+ bool io_plugged;
};
/**
@@ -307,4 +317,6 @@ static inline void aio_timer_init(AioContext *ctx,
timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
}
+void aio_init_io_queue(AioContext *ctx, unsigned int size);
+void aio_exit_io_queue(AioContext *ctx);
#endif
diff --git a/include/block/block.h b/include/block/block.h
index d0baf4f..702b79a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -578,4 +578,10 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs);
*/
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+void bdrv_init_io_queue(BlockDriverState *bs, unsigned int size);
+void bdrv_exit_io_queue(BlockDriverState *bs);
+bool bdrv_support_io_queue(BlockDriverState *bs);
+void bdrv_queue_io(BlockDriverState *bs, void *iocb);
+void bdrv_io_plug(BlockDriverState *bs);
+void bdrv_io_unplug(BlockDriverState *bs);
#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 715c761..a24fe2d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -257,6 +257,9 @@ struct BlockDriver {
void (*bdrv_attach_aio_context)(BlockDriverState *bs,
AioContext *new_context);
+ /* io_queue */
+ int (*bdrv_submit_io_queue)(BlockDriverState *bs);
+
QLIST_ENTRY(BlockDriver) list;
};
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* [Qemu-devel] [PATCH 2/3] block: linux-aio: support submit io_queue
2014-06-30 9:49 [Qemu-devel] [PATCH 0/3] block: introduce submit I/O at batch Ming Lei
2014-06-30 9:49 ` [Qemu-devel] [PATCH 1/3] block: introduce IO queue APIs Ming Lei
@ 2014-06-30 9:49 ` Ming Lei
2014-06-30 9:49 ` [Qemu-devel] [PATCH 3/3] dataplane: submit I/O at batch Ming Lei
2014-06-30 11:10 ` [Qemu-devel] [PATCH 0/3] block: introduce " Paolo Bonzini
3 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2014-06-30 9:49 UTC (permalink / raw)
To: Peter Maydell, qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
This patch implements .bdrv_submit_io_queue callback
for linux-aio Block Drivers, so that submitting I/O
at batch can be supported on linux-aio.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
block/linux-aio.c | 32 ++++++++++++++++++++++++++++++--
block/raw-aio.h | 1 +
block/raw-posix.c | 16 ++++++++++++++++
3 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index f0a2c08..12f56d8 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,6 +11,7 @@
#include "block/aio.h"
#include "qemu/queue.h"
#include "block/raw-aio.h"
+#include "block/block_int.h"
#include "qemu/event_notifier.h"
#include <libaio.h>
@@ -135,6 +136,29 @@ static const AIOCBInfo laio_aiocb_info = {
.cancel = laio_cancel,
};
+int laio_submit_io_queue(BlockDriverState *bs, void *aio_ctx)
+{
+ struct qemu_laio_state *s = aio_ctx;
+ int ret, i;
+
+ while (1) {
+ ret = io_submit(s->ctx, bs->aio_context->io_queue.idx,
+ (struct iocb **)bs->aio_context->io_queue.iocbs);
+ if (ret != -EAGAIN)
+ break;
+ }
+
+ if (ret >= 0)
+ return 0;
+
+ for (i = 0; i < bs->aio_context->io_queue.idx; i++) {
+ struct qemu_laiocb *laiocb =
+ container_of(bs->aio_context->io_queue.iocbs[i], struct qemu_laiocb, iocb);
+ qemu_aio_release(laiocb);
+ }
+ return ret;
+}
+
BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -168,8 +192,12 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
- if (io_submit(s->ctx, 1, &iocbs) < 0)
- goto out_free_aiocb;
+ if (!bs->aio_context->io_plugged) {
+ if (io_submit(s->ctx, 1, &iocbs) < 0)
+ goto out_free_aiocb;
+ } else {
+ bdrv_queue_io(bs, iocbs);
+ }
return &laiocb->common;
out_free_aiocb:
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 8cf084e..89fa775 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -40,6 +40,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
BlockDriverCompletionFunc *cb, void *opaque, int type);
void laio_detach_aio_context(void *s, AioContext *old_context);
void laio_attach_aio_context(void *s, AioContext *new_context);
+int laio_submit_io_queue(BlockDriverState *bs, void *aio_ctx);
#endif
#ifdef _WIN32
diff --git a/block/raw-posix.c b/block/raw-posix.c
index dacf4fb..5869e8e 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1054,6 +1054,17 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
cb, opaque, type);
}
+static int raw_aio_submit_io_queue(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+
+#ifdef CONFIG_LINUX_AIO
+ if (s->use_aio)
+ return laio_submit_io_queue(bs, s->aio_ctx);
+#endif
+ return 0;
+}
+
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
@@ -1503,6 +1514,7 @@ static BlockDriver bdrv_file = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_submit_io_queue = raw_aio_submit_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1902,6 +1914,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_submit_io_queue = raw_aio_submit_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2047,6 +2060,7 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_submit_io_queue = raw_aio_submit_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2175,6 +2189,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_submit_io_queue = raw_aio_submit_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2309,6 +2324,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_submit_io_queue = raw_aio_submit_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* [Qemu-devel] [PATCH 3/3] dataplane: submit I/O at batch
2014-06-30 9:49 [Qemu-devel] [PATCH 0/3] block: introduce submit I/O at batch Ming Lei
2014-06-30 9:49 ` [Qemu-devel] [PATCH 1/3] block: introduce IO queue APIs Ming Lei
2014-06-30 9:49 ` [Qemu-devel] [PATCH 2/3] block: linux-aio: support submit io_queue Ming Lei
@ 2014-06-30 9:49 ` Ming Lei
2014-06-30 11:10 ` [Qemu-devel] [PATCH 0/3] block: introduce " Paolo Bonzini
3 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2014-06-30 9:49 UTC (permalink / raw)
To: Peter Maydell, qemu-devel, Paolo Bonzini, Stefan Hajnoczi
Cc: Kevin Wolf, Ming Lei, Fam Zheng, Michael S. Tsirkin
Before commit 580b6b2aa2(dataplane: use the Qemu block
layer for I/O), dataplane for virtio-blk submits block
I/O at batch.
This commit 580b6b2aa2 replaces the custom linux AIO
implementation(including I/O batch) with Qemu block
layer, but this commit causes ~40% throughput regression
on virtio-blk performance, and removing submitting I/O
at batch is one of the cause.
This patch applys the new introduced bdrv_io_plug() and
bdrv_io_unplug() interfaces to support submitting I/O
at batch for Qemu block layer, and in my test, the change
can improve thoughput by ~30% with 'aio=native'.
Following my fio test script:
[global]
direct=1
size=4G
bsrange=4k-4k
timeout=40
numjobs=4
ioengine=libaio
iodepth=64
filename=/dev/vdc
group_reporting=1
[f]
rw=randread
Result on one of my small machine(host: x86_64, 2cores, 4thread, guest: 4cores):
- qemu master: 62K IOPS
- qemu master with these patches: 84K IOPS
- 2.0.0 release(dataplane using custom linux aio): 97K IOPS
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
hw/block/dataplane/virtio-blk.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c10b7b7..5deaddc 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -289,6 +289,7 @@ static void handle_notify(EventNotifier *e)
int ret;
event_notifier_test_and_clear(&s->host_notifier);
+ bdrv_io_plug(s->blk->conf.bs);
for (;;) {
/* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring);
@@ -322,6 +323,7 @@ static void handle_notify(EventNotifier *e)
break;
}
}
+ bdrv_io_unplug(s->blk->conf.bs);
}
/* Context: QEMU global mutex held */
@@ -431,6 +433,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
trace_virtio_blk_data_plane_start(s);
bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
+ bdrv_init_io_queue(s->blk->conf.bs, 128);
/* Kick right away to begin processing requests already in vring */
event_notifier_set(virtio_queue_get_host_notifier(vq));
@@ -462,6 +465,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
aio_context_release(s->ctx);
+ bdrv_exit_io_queue(s->blk->conf.bs);
+
/* Sync vring state back to virtqueue so that non-dataplane request
* processing can continue when we disable the host notifier below.
*/
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread