From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Kevin Wolf" <kwolf@redhat.com>,
"Benoît Canet" <benoit@irqsave.net>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Zhi Yong Wu" <wuzhy@cn.ibm.com>
Subject: [Qemu-devel] [RFC 1/4] block: fix I/O throttling accounting blind spot
Date: Thu, 21 Mar 2013 15:49:56 +0100 [thread overview]
Message-ID: <1363877399-16339-2-git-send-email-stefanha@redhat.com> (raw)
In-Reply-To: <1363877399-16339-1-git-send-email-stefanha@redhat.com>
I/O throttling relies on bdrv_acct_done() which is called when a request
completes. This leaves a blind spot since we only charge for completed
requests, not submitted requests.
For example, if there is 1 operation remaining in this time slice the
guest could submit 3 operations and they will all be submitted
successfully since they don't actually get accounted for until they
complete.
Originally we probably thought this is okay since the requests will be
accounted when the time slice is extended. In practice it causes
fluctuations since the guest can exceed its I/O limit and it will be
punished for this later on.
Account for I/O upon submission so that I/O limits are enforced
properly.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block.c | 19 ++++++++-----------
include/block/block_int.h | 2 +-
2 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/block.c b/block.c
index 0a062c9..31fb0b0 100644
--- a/block.c
+++ b/block.c
@@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
bs->slice_start = 0;
bs->slice_end = 0;
bs->slice_time = 0;
- memset(&bs->io_base, 0, sizeof(bs->io_base));
}
static void bdrv_block_timer(void *opaque)
@@ -1329,8 +1328,8 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
bs_dest->slice_time = bs_src->slice_time;
bs_dest->slice_start = bs_src->slice_start;
bs_dest->slice_end = bs_src->slice_end;
+ bs_dest->slice_submitted = bs_src->slice_submitted;
bs_dest->io_limits = bs_src->io_limits;
- bs_dest->io_base = bs_src->io_base;
bs_dest->throttled_reqs = bs_src->throttled_reqs;
bs_dest->block_timer = bs_src->block_timer;
bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
@@ -3665,9 +3664,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
bytes_limit = bps_limit * slice_time;
- bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
+ bytes_base = bs->slice_submitted.bytes[is_write];
if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
+ bytes_base += bs->slice_submitted.bytes[!is_write];
}
/* bytes_base: the bytes of data which have been read/written; and
@@ -3683,6 +3682,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
*wait = 0;
}
+ bs->slice_submitted.bytes[is_write] += bytes_res;
return false;
}
@@ -3725,9 +3725,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
ios_limit = iops_limit * slice_time;
- ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
+ ios_base = bs->slice_submitted.ios[is_write];
if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
+ ios_base += bs->slice_submitted.ios[!is_write];
}
if (ios_base + 1 <= ios_limit) {
@@ -3735,6 +3735,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
*wait = 0;
}
+ bs->slice_submitted.ios[is_write]++;
return false;
}
@@ -3772,11 +3773,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
bs->slice_start = now;
bs->slice_end = now + bs->slice_time;
- bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
- bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
-
- bs->io_base.ios[is_write] = bs->nr_ops[is_write];
- bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
+ memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
}
elapsed_time = now - bs->slice_start;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index ce0aa26..b461764 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -251,7 +251,7 @@ struct BlockDriverState {
int64_t slice_start;
int64_t slice_end;
BlockIOLimit io_limits;
- BlockIOBaseValue io_base;
+ BlockIOBaseValue slice_submitted;
CoQueue throttled_reqs;
QEMUTimer *block_timer;
bool io_limits_enabled;
--
1.8.1.4
next prev parent reply other threads:[~2013-03-21 14:50 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-21 14:49 [Qemu-devel] [RFC 0/4] block: fix I/O throttling oscillations Stefan Hajnoczi
2013-03-21 14:49 ` Stefan Hajnoczi [this message]
2013-03-27 8:50 ` [Qemu-devel] [RFC 1/4] block: fix I/O throttling accounting blind spot Zhi Yong Wu
2013-03-27 9:14 ` Stefan Hajnoczi
2013-03-27 12:49 ` Zhi Yong Wu
2013-03-21 14:49 ` [Qemu-devel] [RFC 2/4] block: keep I/O throttling slice time constant Stefan Hajnoczi
2013-03-21 14:49 ` [Qemu-devel] [RFC 3/4] block: drop duplicated slice extension code Stefan Hajnoczi
2013-03-21 14:49 ` [Qemu-devel] [RFC 4/4] block: clean up I/O throttling wait_time code Stefan Hajnoczi
2013-03-21 15:27 ` [Qemu-devel] [RFC 0/4] block: fix I/O throttling oscillations Benoît Canet
2013-03-21 16:34 ` Stefan Hajnoczi
2013-03-27 12:55 ` Zhi Yong Wu
2013-04-04 12:15 ` Benoît Canet
2013-04-04 16:24 ` Kevin Wolf
2013-04-04 16:30 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1363877399-16339-2-git-send-email-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=benoit@irqsave.net \
--cc=kwolf@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=wuzhy@cn.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).