From: Ram Pai <linuxram@us.ibm.com>
To: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Cc: kwolf@redhat.com, pair@us.ibm.com, stefanha@linux.vnet.ibm.com,
kvm@vger.kernel.org, mtosatti@redhat.com, qemu-devel@nongnu.org,
zwu.kernel@gmail.com, ryanh@us.ibm.com, luowenj@cn.ibm.com
Subject: Re: [Qemu-devel] [PATCH v5 3/4] block: add block timer and block throttling algorithm
Date: Tue, 9 Aug 2011 16:57:28 +0800 [thread overview]
Message-ID: <20110809085728.GB17510@ram-ThinkPad-T61> (raw)
In-Reply-To: <1312863472-6901-4-git-send-email-wuzhy@linux.vnet.ibm.com>
On Tue, Aug 09, 2011 at 12:17:51PM +0800, Zhi Yong Wu wrote:
> Note:
> 1.) When bps/iops limits are specified to a small value such as 511 bytes/s, this VM will hang up. We are considering how to handle this senario.
> 2.) When "dd" command is issued in guest, if its option bs is set to a large value such as "bs=1024K", the result speed will slightly bigger than the limits.
>
> For these problems, if you have nice thought, pls let us know.:)
>
> Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
> ---
> block.c | 347 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> block.h | 6 +-
> block_int.h | 30 +++++
> 3 files changed, 372 insertions(+), 11 deletions(-)
>
> diff --git a/block.c b/block.c
> index 24a25d5..8fd6643 100644
> --- a/block.c
> +++ b/block.c
> @@ -29,6 +29,9 @@
> #include "module.h"
> #include "qemu-objects.h"
>
> +#include "qemu-timer.h"
> +#include "block/blk-queue.h"
> +
> #ifdef CONFIG_BSD
> #include <sys/types.h>
> #include <sys/stat.h>
> @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
> static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
> const uint8_t *buf, int nb_sectors);
>
> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
> + bool is_write, double elapsed_time, uint64_t *wait);
> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
> + double elapsed_time, uint64_t *wait);
> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
> + bool is_write, uint64_t *wait);
> +
> static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
> QTAILQ_HEAD_INITIALIZER(bdrv_states);
>
> @@ -90,6 +100,68 @@ int is_windows_drive(const char *filename)
> }
> #endif
>
> +/* throttling disk I/O limits */
> +void bdrv_io_limits_disable(BlockDriverState *bs)
> +{
> + bs->io_limits_enabled = false;
> + bs->req_from_queue = false;
> +
> + if (bs->block_queue) {
> + qemu_block_queue_flush(bs->block_queue);
> + qemu_del_block_queue(bs->block_queue);
> + }
> +
> + if (bs->block_timer) {
> + qemu_del_timer(bs->block_timer);
> + qemu_free_timer(bs->block_timer);
> + }
> +
> + bs->slice_start[0] = 0;
> + bs->slice_start[1] = 0;
> +
> + bs->slice_end[0] = 0;
> + bs->slice_end[1] = 0;
> +}
> +
> +static void bdrv_block_timer(void *opaque)
> +{
> + BlockDriverState *bs = opaque;
> + BlockQueue *queue = bs->block_queue;
> +
> + qemu_block_queue_flush(queue);
> +}
> +
> +void bdrv_io_limits_enable(BlockDriverState *bs)
> +{
> + bs->req_from_queue = false;
> +
> + bs->block_queue = qemu_new_block_queue();
> + bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
> +
> + bs->slice_start[BLOCK_IO_LIMIT_READ] = qemu_get_clock_ns(vm_clock);
> + bs->slice_start[BLOCK_IO_LIMIT_WRITE] = qemu_get_clock_ns(vm_clock);
a minor comment. better to keep the slice_start of the both the READ and WRITE
side the same.
bs->slice_start[BLOCK_IO_LIMIT_WRITE] = bs->slice_start[BLOCK_IO_LIMIT_READ];
saves a call to qemu_get_clock_ns().
> +
> + bs->slice_end[BLOCK_IO_LIMIT_READ] =
> + qemu_get_clock_ns(vm_clock) + BLOCK_IO_SLICE_TIME;
bs->slice_end[BLOCK_IO_LIMIT_READ] = bs->slice_start[BLOCK_IO_LIMIT_READ] +
BLOCK_IO_SLICE_TIME;
saves one more call to qemu_get_clock_ns()
> + bs->slice_end[BLOCK_IO_LIMIT_WRITE] =
> + qemu_get_clock_ns(vm_clock) + BLOCK_IO_SLICE_TIME;
bs->slice_end[BLOCK_IO_LIMIT_WRITE] = bs->slice_start[BLOCK_IO_LIMIT_WRITE] +
BLOCK_IO_SLICE_TIME;
yet another call saving.
> +}
> +
> +bool bdrv_io_limits_enabled(BlockDriverState *bs)
> +{
> + BlockIOLimit *io_limits = &bs->io_limits;
> + if ((io_limits->bps[BLOCK_IO_LIMIT_READ] == 0)
> + && (io_limits->bps[BLOCK_IO_LIMIT_WRITE] == 0)
> + && (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] == 0)
> + && (io_limits->iops[BLOCK_IO_LIMIT_READ] == 0)
> + && (io_limits->iops[BLOCK_IO_LIMIT_WRITE] == 0)
> + && (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] == 0)) {
> + return false;
> + }
> +
> + return true;
> +}
can be optimized to:
return (io_limits->bps[BLOCK_IO_LIMIT_READ]
|| io_limits->bps[BLOCK_IO_LIMIT_WRITE]
|| io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
|| io_limits->iops[BLOCK_IO_LIMIT_READ]
|| io_limits->iops[BLOCK_IO_LIMIT_WRITE]
|| io_limits->iops[BLOCK_IO_LIMIT_TOTAL]);
> +
> /* check if the path starts with "<protocol>:" */
> static int path_has_protocol(const char *path)
> {
> @@ -642,6 +714,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
> bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
> }
>
> + /* throttling disk I/O limits */
> + if (bs->io_limits_enabled) {
> + bdrv_io_limits_enable(bs);
> + }
> +
> return 0;
>
> unlink_and_fail:
> @@ -680,6 +757,16 @@ void bdrv_close(BlockDriverState *bs)
> if (bs->change_cb)
> bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
> }
> +
> + /* throttling disk I/O limits */
> + if (bs->block_queue) {
> + qemu_del_block_queue(bs->block_queue);
> + }
> +
> + if (bs->block_timer) {
> + qemu_del_timer(bs->block_timer);
> + qemu_free_timer(bs->block_timer);
> + }
> }
>
> void bdrv_close_all(void)
> @@ -1312,6 +1399,15 @@ void bdrv_get_geometry_hint(BlockDriverState *bs,
> *psecs = bs->secs;
> }
>
> +/* throttling disk io limits */
> +void bdrv_set_io_limits(BlockDriverState *bs,
> + BlockIOLimit *io_limits)
> +{
> + memset(&bs->io_limits, 0, sizeof(BlockIOLimit));
> + bs->io_limits = *io_limits;
> + bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
> +}
> +
> /* Recognize floppy formats */
> typedef struct FDFormat {
> FDriveType drive;
> @@ -1707,6 +1803,16 @@ static void bdrv_print_dict(QObject *obj, void *opaque)
> qdict_get_bool(qdict, "ro"),
> qdict_get_str(qdict, "drv"),
> qdict_get_bool(qdict, "encrypted"));
> +
> + monitor_printf(mon, " bps=%" PRId64 " bps_rd=%" PRId64
> + " bps_wr=%" PRId64 " iops=%" PRId64
> + " iops_rd=%" PRId64 " iops_wr=%" PRId64,
> + qdict_get_int(qdict, "bps"),
> + qdict_get_int(qdict, "bps_rd"),
> + qdict_get_int(qdict, "bps_wr"),
> + qdict_get_int(qdict, "iops"),
> + qdict_get_int(qdict, "iops_rd"),
> + qdict_get_int(qdict, "iops_wr"));
> } else {
> monitor_printf(mon, " [not inserted]");
> }
> @@ -1739,10 +1845,22 @@ void bdrv_info(Monitor *mon, QObject **ret_data)
> QDict *bs_dict = qobject_to_qdict(bs_obj);
>
> obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
> - "'encrypted': %i }",
> + "'encrypted': %i, "
> + "'bps': %" PRId64 ","
> + "'bps_rd': %" PRId64 ","
> + "'bps_wr': %" PRId64 ","
> + "'iops': %" PRId64 ","
> + "'iops_rd': %" PRId64 ","
> + "'iops_wr': %" PRId64 "}",
> bs->filename, bs->read_only,
> bs->drv->format_name,
> - bdrv_is_encrypted(bs));
> + bdrv_is_encrypted(bs),
> + bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL],
> + bs->io_limits.bps[BLOCK_IO_LIMIT_READ],
> + bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE],
> + bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL],
> + bs->io_limits.iops[BLOCK_IO_LIMIT_READ],
> + bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]);
> if (bs->backing_file[0] != '\0') {
> QDict *qdict = qobject_to_qdict(obj);
> qdict_put(qdict, "backing_file",
> @@ -2111,6 +2229,165 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
> return buf;
> }
>
> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
> + bool is_write, double elapsed_time, uint64_t *wait) {
> + uint64_t bps_limit = 0;
> + double bytes_limit, bytes_disp, bytes_res;
> + double slice_time, wait_time;
> +
> + if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
> + bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
> + } else if (bs->io_limits.bps[is_write]) {
> + bps_limit = bs->io_limits.bps[is_write];
> + } else {
> + if (wait) {
> + *wait = 0;
> + }
> +
> + return false;
> + }
> +
> + slice_time = bs->slice_end[is_write] - bs->slice_start[is_write];
> + slice_time /= (BLOCK_IO_SLICE_TIME * 10.0);
> + bytes_limit = bps_limit * slice_time;
> + bytes_disp = bs->io_disps.bytes[is_write];
> + if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
> + bytes_disp += bs->io_disps.bytes[!is_write];
> + }
> +
> + bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
> +
> + if (bytes_disp + bytes_res <= bytes_limit) {
> + if (wait) {
> + *wait = 0;
> + }
> +
> + return false;
> + }
> +
> + /* Calc approx time to dispatch */
> + wait_time = (bytes_disp + bytes_res) / bps_limit - elapsed_time;
> +
> + if (wait) {
> + *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
> + }
> +
> + return true;
> +}
> +
> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
> + double elapsed_time, uint64_t *wait) {
> + uint64_t iops_limit = 0;
> + double ios_limit, ios_disp;
> + double slice_time, wait_time;
> +
> + if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
> + iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
> + } else if (bs->io_limits.iops[is_write]) {
> + iops_limit = bs->io_limits.iops[is_write];
> + } else {
> + if (wait) {
> + *wait = 0;
> + }
> +
> + return false;
> + }
> +
> + slice_time = bs->slice_end[is_write] - bs->slice_start[is_write];
> + slice_time /= (BLOCK_IO_SLICE_TIME * 10.0);
> + ios_limit = iops_limit * slice_time;
> + ios_disp = bs->io_disps.ios[is_write];
> + if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
> + ios_disp += bs->io_disps.ios[!is_write];
> + }
> +
> + if (ios_disp + 1 <= ios_limit) {
> + if (wait) {
> + *wait = 0;
> + }
> +
> + return false;
> + }
> +
> + /* Calc approx time to dispatch */
> + wait_time = (ios_disp + 1) / iops_limit;
> + if (wait_time > elapsed_time) {
> + wait_time = wait_time - elapsed_time;
> + } else {
> + wait_time = 0;
> + }
> +
> + if (wait) {
> + *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
> + }
> +
> + return true;
> +}
bdrv_exceed_bps_limits() and bdrv_exceed_iops_limits() has almost similar logic.
Probably can be abstracted into a single function.
RP
next prev parent reply other threads:[~2011-08-09 8:57 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-09 4:17 [Qemu-devel] [PATCH v5 0/4] The intro of QEMU block I/O throttling Zhi Yong Wu
2011-08-09 4:17 ` [Qemu-devel] [PATCH v5 1/4] block: add the command line support Zhi Yong Wu
2011-08-09 12:25 ` Stefan Hajnoczi
2011-08-10 5:20 ` Zhi Yong Wu
2011-08-10 9:27 ` Stefan Hajnoczi
2011-08-11 4:44 ` Zhi Yong Wu
2011-08-11 5:35 ` Stefan Hajnoczi
2011-08-09 4:17 ` [Qemu-devel] [PATCH v5 2/4] block: add the block queue support Zhi Yong Wu
2011-08-09 8:46 ` Ram Pai
2011-08-09 8:53 ` Zhi Yong Wu
2011-08-09 12:49 ` Stefan Hajnoczi
2011-08-10 5:54 ` Zhi Yong Wu
2011-08-10 9:37 ` Stefan Hajnoczi
2011-08-11 4:59 ` Zhi Yong Wu
2011-08-11 5:36 ` Zhi Yong Wu
2011-08-12 4:40 ` Zhi Yong Wu
2011-08-12 4:50 ` Stefan Hajnoczi
2011-08-12 8:10 ` Zhi Yong Wu
2011-08-12 8:42 ` Stefan Hajnoczi
2011-08-12 9:11 ` Zhi Yong Wu
2011-08-09 4:17 ` [Qemu-devel] [PATCH v5 3/4] block: add block timer and block throttling algorithm Zhi Yong Wu
2011-08-09 8:57 ` Ram Pai [this message]
2011-08-09 9:06 ` Zhi Yong Wu
2011-08-12 5:35 ` Zhi Yong Wu
2011-08-12 5:47 ` Stefan Hajnoczi
2011-08-12 6:00 ` Zhi Yong Wu
2011-08-09 15:19 ` Stefan Hajnoczi
2011-08-10 6:57 ` Zhi Yong Wu
2011-08-10 11:00 ` Stefan Hajnoczi
2011-08-12 5:00 ` Zhi Yong Wu
2011-08-12 5:06 ` Stefan Hajnoczi
2011-08-12 5:23 ` Zhi Yong Wu
2011-08-09 4:17 ` [Qemu-devel] [PATCH v5 4/4] qmp/hmp: add block_set_io_throttle Zhi Yong Wu
2011-08-09 12:08 ` [Qemu-devel] [PATCH v5 0/4] The intro of QEMU block I/O throttling Stefan Hajnoczi
2011-08-10 5:09 ` Zhi Yong Wu
2011-08-10 9:39 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110809085728.GB17510@ram-ThinkPad-T61 \
--to=linuxram@us.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=kwolf@redhat.com \
--cc=luowenj@cn.ibm.com \
--cc=mtosatti@redhat.com \
--cc=pair@us.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=ryanh@us.ibm.com \
--cc=stefanha@linux.vnet.ibm.com \
--cc=wuzhy@linux.vnet.ibm.com \
--cc=zwu.kernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).