* [Qemu-devel] [PATCH 0/1] The intro for QEMU disk I/O limits
@ 2011-07-26 8:57 Zhi Yong Wu
2011-07-26 8:57 ` [Qemu-devel] [PATCH 1/1] The codes V2 " Zhi Yong Wu
0 siblings, 1 reply; 2+ messages in thread
From: Zhi Yong Wu @ 2011-07-26 8:57 UTC (permalink / raw)
To: qemu-devel
Cc: kwolf, aliguori, stefanha, kvm, Zhi Yong Wu, zwu.kernel, ryanh,
luowenj, vgoyal
The main goal of the patch is to effectively cap the disk I/O speed or counts of one single VM.It is only one draft, so it unavoidably has some drawbacks, if you catch them, please let me know.
The patch will mainly introduce one block I/O throttling algorithm, one global timer and one block queue for each I/O limits enabled drive.
When a block request is coming in, the throttling algorithm will check if its I/O rate or counts exceed the limits; if yes, then it will enqueue to the block queue; The timer will periodically handle the I/O requests in it.
Some available features follow as below:
(1) global bps limit.
-drive bps=xxx in bytes/s
(2) only read bps limit
-drive bps_rd=xxx in bytes/s
(3) only write bps limit
-drive bps_wr=xxx in bytes/s
(4) global iops limit
-drive iops=xxx in ios/s
(5) only read iops limit
-drive iops_rd=xxx in ios/s
(6) only write iops limit
-drive iops_wr=xxx in ios/s
(7) the combination of some limits.
-drive bps=xxx,iops=xxx
Known Limitations:
(1) #1 can not coexist with #2, #3
(2) #4 can not coexist with #5, #6
Zhi Yong Wu (1):
v2: The codes V2 for QEMU disk I/O limits.
Modified the codes mainly based on stefan's comments.
v1: Submit the codes for QEMU disk I/O limits.
Only a code draft.
Makefile.objs | 2 +-
block.c | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++--
block.h | 1 -
block/blk-queue.c | 116 +++++++++++++++++++++
block/blk-queue.h | 70 +++++++++++++
block_int.h | 28 +++++
blockdev.c | 21 ++++
qemu-config.c | 24 +++++
qemu-option.c | 17 +++
qemu-option.h | 1 +
qemu-options.hx | 1 +
11 files changed, 559 insertions(+), 10 deletions(-)
create mode 100644 block/blk-queue.c
create mode 100644 block/blk-queue.h
--
1.7.2.3
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Qemu-devel] [PATCH 1/1] The codes V2 for QEMU disk I/O limits.
2011-07-26 8:57 [Qemu-devel] [PATCH 0/1] The intro for QEMU disk I/O limits Zhi Yong Wu
@ 2011-07-26 8:57 ` Zhi Yong Wu
0 siblings, 0 replies; 2+ messages in thread
From: Zhi Yong Wu @ 2011-07-26 8:57 UTC (permalink / raw)
To: qemu-devel
Cc: kwolf, aliguori, stefanha, kvm, Zhi Yong Wu, zwu.kernel, ryanh,
luowenj, vgoyal
Welcome to give me your comments, thanks.
Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
Makefile.objs | 2 +-
block.c | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++--
block.h | 1 -
block/blk-queue.c | 116 +++++++++++++++++++++
block/blk-queue.h | 70 +++++++++++++
block_int.h | 28 +++++
blockdev.c | 21 ++++
qemu-config.c | 24 +++++
qemu-option.c | 17 +++
qemu-option.h | 1 +
qemu-options.hx | 1 +
11 files changed, 559 insertions(+), 10 deletions(-)
create mode 100644 block/blk-queue.c
create mode 100644 block/blk-queue.h
diff --git a/Makefile.objs b/Makefile.objs
index 9f99ed4..06f2033 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -23,7 +23,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blk-queue.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block.c b/block.c
index 24a25d5..e54e59c 100644
--- a/block.c
+++ b/block.c
@@ -29,6 +29,9 @@
#include "module.h"
#include "qemu-objects.h"
+#include "qemu-timer.h"
+#include "block/blk-queue.h"
+
#ifdef CONFIG_BSD
#include <sys/types.h>
#include <sys/stat.h>
@@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, uint64_t *wait);
+
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -90,6 +100,20 @@ int is_windows_drive(const char *filename)
}
#endif
+static int bdrv_io_limits_enable(BlockIOLimit *io_limits)
+{
+ if ((io_limits->bps[0] == 0)
+ && (io_limits->bps[1] == 0)
+ && (io_limits->bps[2] == 0)
+ && (io_limits->iops[0] == 0)
+ && (io_limits->iops[1] == 0)
+ && (io_limits->iops[2] == 0)) {
+ return 0;
+ }
+
+ return 1;
+}
+
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
@@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size,
}
}
+static void bdrv_block_timer(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BlockQueue *queue = bs->block_queue;
+
+ while (!QTAILQ_EMPTY(&queue->requests)) {
+ BlockIORequest *request;
+ int ret;
+
+ request = QTAILQ_FIRST(&queue->requests);
+ QTAILQ_REMOVE(&queue->requests, request, entry);
+
+ ret = qemu_block_queue_handler(request);
+ if (ret == 0) {
+ QTAILQ_INSERT_HEAD(&queue->requests, request, entry);
+ break;
+ }
+
+ qemu_free(request);
+ }
+}
+
void bdrv_register(BlockDriver *bdrv)
{
if (!bdrv->bdrv_aio_readv) {
@@ -642,6 +688,15 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
}
+ /* throttling disk I/O limits */
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->block_queue = qemu_new_block_queue();
+ bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+
+ bs->slice_start[0] = qemu_get_clock_ns(vm_clock);
+ bs->slice_start[1] = qemu_get_clock_ns(vm_clock);
+ }
+
return 0;
unlink_and_fail:
@@ -680,6 +735,16 @@ void bdrv_close(BlockDriverState *bs)
if (bs->change_cb)
bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
}
+
+ /* throttling disk I/O limits */
+ if (bs->block_queue) {
+ qemu_del_block_queue(bs->block_queue);
+ }
+
+ if (bs->block_timer) {
+ qemu_del_timer(bs->block_timer);
+ qemu_free_timer(bs->block_timer);
+ }
}
void bdrv_close_all(void)
@@ -1312,6 +1377,14 @@ void bdrv_get_geometry_hint(BlockDriverState *bs,
*psecs = bs->secs;
}
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits)
+{
+ memset(&bs->io_limits, 0, sizeof(BlockIOLimit));
+ bs->io_limits = *io_limits;
+}
+
/* Recognize floppy formats */
typedef struct FDFormat {
FDriveType drive;
@@ -2111,6 +2184,155 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
return buf;
}
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait) {
+ uint64_t bps_limit = 0;
+ double bytes_limit, bytes_disp, bytes_res;
+ double slice_time = 0.1, wait_time;
+
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.bps[is_write]) {
+ bps_limit = bs->io_limits.bps[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ bytes_limit = bps_limit * slice_time;
+ bytes_disp = bs->io_disps.bytes[is_write];
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bytes_disp += bs->io_disps.bytes[!is_write];
+ }
+
+ bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+
+ if (bytes_disp + bytes_res <= bytes_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (bytes_disp + bytes_res - bytes_limit) / bps_limit;
+ if (!wait_time) {
+ wait_time = 1;
+ }
+
+ wait_time = wait_time + (slice_time - elapsed_time);
+ if (wait) {
+ *wait = wait_time * BLOCK_IO_SLICE_TIME * 10 + 1;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait) {
+ uint64_t iops_limit = 0;
+ double ios_limit, ios_disp;
+ double slice_time = 0.1, wait_time;
+
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.iops[is_write]) {
+ iops_limit = bs->io_limits.iops[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ ios_limit = iops_limit * slice_time;
+ ios_disp = bs->io_disps.ios[is_write];
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ ios_disp += bs->io_disps.ios[!is_write];
+ }
+
+ if (ios_disp + 1 <= ios_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (ios_disp + 1) / iops_limit;
+ if (wait_time > elapsed_time) {
+ wait_time = wait_time - elapsed_time;
+ } else {
+ wait_time = 0;
+ }
+
+ if (wait) {
+ *wait = wait_time * BLOCK_IO_SLICE_TIME * 10 + 1;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, uint64_t *wait) {
+ int64_t real_time;
+ uint64_t bps_wait = 0, iops_wait = 0, max_wait;
+ double elapsed_time;
+ int bps_ret, iops_ret;
+
+ real_time = qemu_get_clock_ns(vm_clock);
+ if (bs->slice_start[is_write] + BLOCK_IO_SLICE_TIME <= real_time) {
+ bs->slice_start[is_write] = real_time;
+
+ bs->io_disps.bytes[is_write] = 0;
+ bs->io_disps.bytes[!is_write] = 0;
+
+ bs->io_disps.ios[is_write] = 0;
+ bs->io_disps.ios[!is_write] = 0;
+ }
+
+ /* If a limit was exceeded, immediately queue this request */
+ if ((bs->req_from_queue == false)
+ && !QTAILQ_EMPTY(&bs->block_queue->requests)) {
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]
+ || bs->io_limits.bps[is_write] || bs->io_limits.iops[is_write]
+ || bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return true;
+ }
+ }
+
+ elapsed_time = real_time - bs->slice_start[is_write];
+ elapsed_time /= (BLOCK_IO_SLICE_TIME * 10.0);
+
+ bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
+ is_write, elapsed_time, &bps_wait);
+ iops_ret = bdrv_exceed_iops_limits(bs, is_write,
+ elapsed_time, &iops_wait);
+ if (bps_ret || iops_ret) {
+ max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
+ if (wait) {
+ *wait = max_wait;
+ }
+
+ return true;
+ }
+
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+}
/**************************************************************/
/* async I/Os */
@@ -2121,13 +2343,28 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
+ uint64_t wait_time = 0;
trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
- if (!drv)
- return NULL;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
+ if (!drv || bdrv_check_request(bs, sector_num, nb_sectors)) {
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->req_from_queue = false;
+ }
return NULL;
+ }
+
+ /* throttling disk read I/O */
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ if (bdrv_exceed_io_limits(bs, nb_sectors, false, &wait_time)) {
+ ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_readv,
+ sector_num, qiov, nb_sectors, cb, opaque);
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ bs->req_from_queue = false;
+ return ret;
+ }
+ }
ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
cb, opaque);
@@ -2136,6 +2373,16 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
/* Update stats even though technically transfer has not happened. */
bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
bs->rd_ops ++;
+
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->io_disps.bytes[BLOCK_IO_LIMIT_READ] +=
+ (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+ bs->io_disps.ios[BLOCK_IO_LIMIT_READ]++;
+ }
+ }
+
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->req_from_queue = false;
}
return ret;
@@ -2184,15 +2431,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
BlockCompleteData *blk_cb_data;
+ uint64_t wait_time = 0;
trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
- if (!drv)
- return NULL;
- if (bs->read_only)
- return NULL;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
+ if (!drv || bs->read_only
+ || bdrv_check_request(bs, sector_num, nb_sectors)) {
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->req_from_queue = false;
+ }
+
return NULL;
+ }
if (bs->dirty_bitmap) {
blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
@@ -2201,6 +2451,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
opaque = blk_cb_data;
}
+ /* throttling disk write I/O */
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ if (bdrv_exceed_io_limits(bs, nb_sectors, true, &wait_time)) {
+ ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_writev,
+ sector_num, qiov, nb_sectors, cb, opaque);
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ bs->req_from_queue = false;
+ return ret;
+ }
+ }
+
ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
cb, opaque);
@@ -2211,6 +2473,16 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
bs->wr_highest_sector = sector_num + nb_sectors - 1;
}
+
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->io_disps.bytes[BLOCK_IO_LIMIT_WRITE] +=
+ (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+ bs->io_disps.ios[BLOCK_IO_LIMIT_WRITE]++;
+ }
+ }
+
+ if (bdrv_io_limits_enable(&bs->io_limits)) {
+ bs->req_from_queue = false;
}
return ret;
diff --git a/block.h b/block.h
index 859d1d9..f0dac62 100644
--- a/block.h
+++ b/block.h
@@ -97,7 +97,6 @@ int bdrv_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
void bdrv_register(BlockDriver *bdrv);
-
typedef struct BdrvCheckResult {
int corruptions;
int leaks;
diff --git a/block/blk-queue.c b/block/blk-queue.c
new file mode 100644
index 0000000..09fcfe9
--- /dev/null
+++ b/block/blk-queue.c
@@ -0,0 +1,116 @@
+/*
+ * QEMU System Emulator queue definition for block layer
+ *
+ * Copyright (c) 2011 Zhi Yong Wu <zwu.kernel@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block_int.h"
+#include "qemu-queue.h"
+#include "block/blk-queue.h"
+
+/* The APIs for block request queue on qemu block layer.
+ */
+
+static void qemu_block_queue_cancel(BlockDriverAIOCB *acb)
+{
+ qemu_aio_release(acb);
+}
+
+static AIOPool block_queue_pool = {
+ .aiocb_size = sizeof(struct BlockDriverAIOCB),
+ .cancel = qemu_block_queue_cancel,
+};
+
+static void qemu_block_queue_callback(void *opaque, int ret)
+{
+ BlockDriverAIOCB *acb = opaque;
+
+ qemu_aio_release(acb);
+}
+
+BlockQueue *qemu_new_block_queue(void)
+{
+ BlockQueue *queue;
+
+ queue = qemu_mallocz(sizeof(BlockQueue));
+
+ QTAILQ_INIT(&queue->requests);
+
+ return queue;
+}
+
+void qemu_del_block_queue(BlockQueue *queue)
+{
+ BlockIORequest *request, *next;
+
+ QTAILQ_FOREACH_SAFE(request, &queue->requests, entry, next) {
+ QTAILQ_REMOVE(&queue->requests, request, entry);
+ qemu_free(request);
+ }
+
+ qemu_free(queue);
+}
+
+BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue,
+ BlockDriverState *bs,
+ BlockRequestHandler *handler,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlockIORequest *request;
+ BlockDriverAIOCB *acb;
+
+ request = qemu_malloc(sizeof(BlockIORequest));
+ request->bs = bs;
+ request->handler = handler;
+ request->sector_num = sector_num;
+ request->qiov = qiov;
+ request->nb_sectors = nb_sectors;
+ request->cb = cb;
+ request->opaque = opaque;
+
+ QTAILQ_INSERT_TAIL(&queue->requests, request, entry);
+
+ acb = qemu_aio_get(&block_queue_pool, bs,
+ qemu_block_queue_callback, opaque);
+
+ return acb;
+}
+
+int qemu_block_queue_handler(BlockIORequest *request)
+{
+ int ret;
+ BlockDriverAIOCB *res;
+
+ /* indicate this req is from block queue */
+ request->bs->req_from_queue = true;
+
+ res = request->handler(request->bs, request->sector_num,
+ request->qiov, request->nb_sectors,
+ request->cb, request->opaque);
+
+ ret = (res == NULL) ? 0 : 1;
+
+ return ret;
+}
diff --git a/block/blk-queue.h b/block/blk-queue.h
new file mode 100644
index 0000000..47f8a36
--- /dev/null
+++ b/block/blk-queue.h
@@ -0,0 +1,70 @@
+/*
+ * QEMU System Emulator queue declaration for block layer
+ *
+ * Copyright (c) 2011 Zhi Yong Wu <zwu.kernel@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_BLOCK_QUEUE_H
+#define QEMU_BLOCK_QUEUE_H
+
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu-common.h"
+
+typedef BlockDriverAIOCB* (BlockRequestHandler) (BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors, BlockDriverCompletionFunc *cb,
+ void *opaque);
+
+struct BlockIORequest {
+ QTAILQ_ENTRY(BlockIORequest) entry;
+ BlockDriverState *bs;
+ BlockRequestHandler *handler;
+ int64_t sector_num;
+ QEMUIOVector *qiov;
+ int nb_sectors;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+};
+
+typedef struct BlockIORequest BlockIORequest;
+
+struct BlockQueue {
+ QTAILQ_HEAD(requests, BlockIORequest) requests;
+};
+
+typedef struct BlockQueue BlockQueue;
+
+BlockQueue *qemu_new_block_queue(void);
+
+void qemu_del_block_queue(BlockQueue *queue);
+
+BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue,
+ BlockDriverState *bs,
+ BlockRequestHandler *handler,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque);
+
+int qemu_block_queue_handler(BlockIORequest *request);
+#endif /* QEMU_BLOCK_QUEUE_H */
diff --git a/block_int.h b/block_int.h
index 1e265d2..1587171 100644
--- a/block_int.h
+++ b/block_int.h
@@ -27,10 +27,17 @@
#include "block.h"
#include "qemu-option.h"
#include "qemu-queue.h"
+#include "block/blk-queue.h"
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+
#define BLOCK_OPT_SIZE "size"
#define BLOCK_OPT_ENCRYPT "encryption"
#define BLOCK_OPT_COMPAT6 "compat6"
@@ -46,6 +53,16 @@ typedef struct AIOPool {
BlockDriverAIOCB *free_aiocb;
} AIOPool;
+typedef struct BlockIOLimit {
+ uint64_t bps[3];
+ uint64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIODisp {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIODisp;
+
struct BlockDriver {
const char *format_name;
int instance_size;
@@ -175,6 +192,14 @@ struct BlockDriverState {
void *sync_aiocb;
+ /* the time for latest disk I/O */
+ int64_t slice_start[2];
+ BlockIOLimit io_limits;
+ BlockIODisp io_disps;
+ BlockQueue *block_queue;
+ QEMUTimer *block_timer;
+ bool req_from_queue;
+
/* I/O stats (display with "info blockstats"). */
uint64_t rd_bytes;
uint64_t wr_bytes;
@@ -222,6 +247,9 @@ void qemu_aio_release(void *p);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
+
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif
diff --git a/blockdev.c b/blockdev.c
index c263663..45602f4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -238,6 +238,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
int on_read_error, on_write_error;
const char *devaddr;
DriveInfo *dinfo;
+ BlockIOLimit io_limits;
+ bool iol_flag = false;
+ const char *iol_opts[7] = {"bps", "bps_rd", "bps_wr", "iops", "iops_rd", "iops_wr"};
int is_extboot = 0;
int snapshot = 0;
int ret;
@@ -372,6 +375,19 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
return NULL;
}
+ /* disk io limits */
+ iol_flag = qemu_opt_io_limits_enable_flag(opts, iol_opts);
+ if (iol_flag) {
+ memset(&io_limits, 0, sizeof(BlockIOLimit));
+
+ io_limits.bps[2] = qemu_opt_get_number(opts, "bps", 0);
+ io_limits.bps[0] = qemu_opt_get_number(opts, "bps_rd", 0);
+ io_limits.bps[1] = qemu_opt_get_number(opts, "bps_wr", 0);
+ io_limits.iops[2] = qemu_opt_get_number(opts, "iops", 0);
+ io_limits.iops[0] = qemu_opt_get_number(opts, "iops_rd", 0);
+ io_limits.iops[1] = qemu_opt_get_number(opts, "iops_wr", 0);
+ }
+
on_write_error = BLOCK_ERR_STOP_ENOSPC;
if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
@@ -483,6 +499,11 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
+ /* throttling disk io limits */
+ if (iol_flag) {
+ bdrv_set_io_limits(dinfo->bdrv, &io_limits);
+ }
+
switch(type) {
case IF_IDE:
case IF_SCSI:
diff --git a/qemu-config.c b/qemu-config.c
index efa892c..9232bbb 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -82,6 +82,30 @@ static QemuOptsList qemu_drive_opts = {
.name = "boot",
.type = QEMU_OPT_BOOL,
.help = "make this a boot drive",
+ },{
+ .name = "iops",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit total I/O operations per second",
+ },{
+ .name = "iops_rd",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit read operations per second",
+ },{
+ .name = "iops_wr",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit write operations per second",
+ },{
+ .name = "bps",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit total bytes per second",
+ },{
+ .name = "bps_rd",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit read bytes per second",
+ },{
+ .name = "bps_wr",
+ .type = QEMU_OPT_NUMBER,
+ .help = "limit write bytes per second",
},
{ /* end of list */ }
},
diff --git a/qemu-option.c b/qemu-option.c
index 65db542..9fe234d 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -562,6 +562,23 @@ uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
return opt->value.uint;
}
+bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **iol_opts)
+{
+ int i;
+ uint64_t opt_val = 0;
+ bool iol_flag = false;
+
+ for (i = 0; iol_opts[i]; i++) {
+ opt_val = qemu_opt_get_number(opts, iol_opts[i], 0);
+ if (opt_val != 0) {
+ iol_flag = true;
+ break;
+ }
+ }
+
+ return iol_flag;
+}
+
uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
{
QemuOpt *opt = qemu_opt_find(opts, name);
diff --git a/qemu-option.h b/qemu-option.h
index b515813..fc909f9 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -107,6 +107,7 @@ struct QemuOptsList {
const char *qemu_opt_get(QemuOpts *opts, const char *name);
int qemu_opt_get_bool(QemuOpts *opts, const char *name, int defval);
uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval);
+bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **iol_opts);
uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval);
int qemu_opt_set(QemuOpts *opts, const char *name, const char *value);
typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque);
diff --git a/qemu-options.hx b/qemu-options.hx
index cb3347e..ae219f5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -121,6 +121,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,cache=writethrough|writeback|none|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
" [,readonly=on|off][,boot=on|off]\n"
+ " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]]\n"
" use 'file' as a drive image\n", QEMU_ARCH_ALL)
STEXI
@item -drive @var{option}[,@var{option}[,@var{option}[,...]]]
--
1.7.2.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2011-07-26 9:01 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-26 8:57 [Qemu-devel] [PATCH 0/1] The intro for QEMU disk I/O limits Zhi Yong Wu
2011-07-26 8:57 ` [Qemu-devel] [PATCH 1/1] The codes V2 " Zhi Yong Wu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).