From: Fam Zheng <famz@redhat.com>
To: "Benoît Canet" <benoit@irqsave.net>
Cc: kwolf@redhat.com, pbonzini@redhat.com, qemu-devel@nongnu.org,
stefanha@redhat.com
Subject: Re: [Qemu-devel] [PATCH V5 3/5] block: Enable the new throttling code in the block layer.
Date: Wed, 14 Aug 2013 17:31:25 +0800 [thread overview]
Message-ID: <20130814093125.GC11081@localhost.localdomain> (raw)
In-Reply-To: <1376326396-7676-4-git-send-email-benoit@irqsave.net>
On Mon, 08/12 18:53, Benoît Canet wrote:
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
> block.c | 349 ++++++++++++++-------------------------------
> block/qapi.c | 21 ++-
> blockdev.c | 100 +++++++------
> include/block/block.h | 1 -
> include/block/block_int.h | 32 +----
> 5 files changed, 173 insertions(+), 330 deletions(-)
>
> diff --git a/block.c b/block.c
> index 01b66d8..d49bc98 100644
> --- a/block.c
> +++ b/block.c
> @@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
> static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
> int64_t sector_num, int nb_sectors);
>
> -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
> - bool is_write, double elapsed_time, uint64_t *wait);
> -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
> - double elapsed_time, uint64_t *wait);
> -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
> - bool is_write, int64_t *wait);
> -
> static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
> QTAILQ_HEAD_INITIALIZER(bdrv_states);
>
> @@ -123,70 +116,110 @@ int is_windows_drive(const char *filename)
> #endif
>
> /* throttling disk I/O limits */
> -void bdrv_io_limits_disable(BlockDriverState *bs)
> +void bdrv_set_io_limits(BlockDriverState *bs,
> + ThrottleConfig *cfg)
> {
> - bs->io_limits_enabled = false;
> + int i;
> +
> + throttle_config(&bs->throttle_state, cfg);
> +
> + for (i = 0; i < 2; i++) {
> + qemu_co_enter_next(&bs->throttled_reqs[i]);
> + }
> +}
> +
> +/* this function drain all the throttled IOs */
> +static bool bdrv_drain_throttled(BlockDriverState *bs)
> +{
> + bool drained = false;
> + bool enabled = bs->io_limits_enabled;
> + int i;
>
> - do {} while (qemu_co_enter_next(&bs->throttled_reqs));
> + bs->io_limits_enabled = false;
>
> - if (bs->block_timer) {
> - qemu_del_timer(bs->block_timer);
> - qemu_free_timer(bs->block_timer);
> - bs->block_timer = NULL;
> + for (i = 0; i < 2; i++) {
> + while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
> + drained = true;
OK, this should be right. Throttling is disabled here, so there can't be
inserting to throttled_reqs[0] when you drain throttled_reqs[1].
> + }
> }
>
> - bs->slice_start = 0;
> - bs->slice_end = 0;
> + bs->io_limits_enabled = enabled;
> +
> + return drained;
> }
>
> -static void bdrv_block_timer(void *opaque)
> +void bdrv_io_limits_disable(BlockDriverState *bs)
> +{
> + bs->io_limits_enabled = false;
> +
> + bdrv_drain_throttled(bs);
> +
> + throttle_destroy(&bs->throttle_state);
> +}
> +
> +static void bdrv_throttle_read_timer_cb(void *opaque)
> {
> BlockDriverState *bs = opaque;
> + qemu_co_enter_next(&bs->throttled_reqs[0]);
> +}
>
> - qemu_co_enter_next(&bs->throttled_reqs);
> +static void bdrv_throttle_write_timer_cb(void *opaque)
> +{
> + BlockDriverState *bs = opaque;
> + qemu_co_enter_next(&bs->throttled_reqs[1]);
> }
>
> +/* should be called before bdrv_set_io_limits if a limit is set */
> void bdrv_io_limits_enable(BlockDriverState *bs)
> {
> - qemu_co_queue_init(&bs->throttled_reqs);
> - bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
> + throttle_init(&bs->throttle_state,
> + vm_clock,
> + bdrv_throttle_read_timer_cb,
> + bdrv_throttle_write_timer_cb,
> + bs);
> + qemu_co_queue_init(&bs->throttled_reqs[0]);
> + qemu_co_queue_init(&bs->throttled_reqs[1]);
> bs->io_limits_enabled = true;
> }
>
> -bool bdrv_io_limits_enabled(BlockDriverState *bs)
> +/* This function make an IO wait if needed
s/make/makes/
> + *
> + * @is_write: is the IO a write
> + * @nb_sectors: the number of sectors of the IO
> + */
> +static void bdrv_io_limits_intercept(BlockDriverState *bs,
> + bool is_write,
> + int nb_sectors)
> {
> - BlockIOLimit *io_limits = &bs->io_limits;
> - return io_limits->bps[BLOCK_IO_LIMIT_READ]
> - || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
> - || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
> - || io_limits->iops[BLOCK_IO_LIMIT_READ]
> - || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
> - || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
> + /* does this io must wait */
> + bool must_wait = !throttle_allowed(&bs->throttle_state, is_write);
> +
> + /* if must wait or any request of this type throttled queue the IO */
> + if (must_wait ||
> + !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
> + qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
> + }
> +
> + /* the IO will be executed do the accounting */
s/executed/executed,/
> + throttle_account(&bs->throttle_state, is_write, nb_sectors);
> }
>
> -static void bdrv_io_limits_intercept(BlockDriverState *bs,
> - bool is_write, int nb_sectors)
> +/* This function will schedule the next IO throttled IO of this type if needed
> + *
> + * @is_write: is the IO a write
> + */
> +static void bdrv_io_limits_resched(BlockDriverState *bs, bool is_write)
> {
> - int64_t wait_time = -1;
>
> - if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
> - qemu_co_queue_wait(&bs->throttled_reqs);
> + if (qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
> + return;
> }
>
> - /* In fact, we hope to keep each request's timing, in FIFO mode. The next
> - * throttled requests will not be dequeued until the current request is
> - * allowed to be serviced. So if the current request still exceeds the
> - * limits, it will be inserted to the head. All requests followed it will
> - * be still in throttled_reqs queue.
> - */
> -
> - while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
> - qemu_mod_timer(bs->block_timer,
> - wait_time + qemu_get_clock_ns(vm_clock));
> - qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
> + if (!throttle_allowed(&bs->throttle_state, is_write)) {
> + return;
> }
>
> - qemu_co_queue_next(&bs->throttled_reqs);
> + qemu_co_queue_next(&bs->throttled_reqs[is_write]);
> }
>
> /* check if the path starts with "<protocol>:" */
> @@ -1112,11 +1145,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
> bdrv_dev_change_media_cb(bs, true);
> }
>
> - /* throttling disk I/O limits */
> - if (bs->io_limits_enabled) {
> - bdrv_io_limits_enable(bs);
> - }
> -
> return 0;
>
> unlink_and_fail:
> @@ -1452,7 +1480,7 @@ void bdrv_drain_all(void)
> * a busy wait.
> */
> QTAILQ_FOREACH(bs, &bdrv_states, list) {
> - while (qemu_co_enter_next(&bs->throttled_reqs)) {
> + if (bdrv_drain_throttled(bs)) {
> busy = true;
> }
> }
> @@ -1461,7 +1489,8 @@ void bdrv_drain_all(void)
> /* If requests are still pending there is a bug somewhere */
> QTAILQ_FOREACH(bs, &bdrv_states, list) {
> assert(QLIST_EMPTY(&bs->tracked_requests));
> - assert(qemu_co_queue_empty(&bs->throttled_reqs));
> + assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
> + assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
> }
> }
>
> @@ -1497,13 +1526,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>
> bs_dest->enable_write_cache = bs_src->enable_write_cache;
>
> - /* i/o timing parameters */
> - bs_dest->slice_start = bs_src->slice_start;
> - bs_dest->slice_end = bs_src->slice_end;
> - bs_dest->slice_submitted = bs_src->slice_submitted;
> - bs_dest->io_limits = bs_src->io_limits;
> - bs_dest->throttled_reqs = bs_src->throttled_reqs;
> - bs_dest->block_timer = bs_src->block_timer;
> + /* i/o throttled req */
> + memcpy(&bs_dest->throttle_state,
> + &bs_src->throttle_state,
> + sizeof(ThrottleState));
> + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
> + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
> bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
>
> /* r/w error */
> @@ -1550,7 +1578,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
> assert(bs_new->dev == NULL);
> assert(bs_new->in_use == 0);
> assert(bs_new->io_limits_enabled == false);
> - assert(bs_new->block_timer == NULL);
> + assert(!throttle_have_timer(&bs_new->throttle_state));
>
> tmp = *bs_new;
> *bs_new = *bs_old;
> @@ -1569,7 +1597,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
> assert(bs_new->job == NULL);
> assert(bs_new->in_use == 0);
> assert(bs_new->io_limits_enabled == false);
> - assert(bs_new->block_timer == NULL);
> + assert(!throttle_have_timer(&bs_new->throttle_state));
>
> bdrv_rebind(bs_new);
> bdrv_rebind(bs_old);
> @@ -1860,8 +1888,15 @@ int bdrv_commit_all(void)
> *
> * This function should be called when a tracked request is completing.
> */
> -static void tracked_request_end(BdrvTrackedRequest *req)
> +static void tracked_request_end(BlockDriverState *bs,
> + BdrvTrackedRequest *req,
> + bool is_write)
> {
> + /* throttling disk I/O */
> + if (bs->io_limits_enabled) {
> + bdrv_io_limits_resched(bs, is_write);
> + }
> +
> QLIST_REMOVE(req, list);
> qemu_co_queue_restart_all(&req->wait_queue);
> }
> @@ -1874,6 +1909,11 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
> int64_t sector_num,
> int nb_sectors, bool is_write)
> {
> + /* throttling disk I/O */
> + if (bs->io_limits_enabled) {
> + bdrv_io_limits_intercept(bs, is_write, nb_sectors);
> + }
> +
> *req = (BdrvTrackedRequest){
> .bs = bs,
> .sector_num = sector_num,
> @@ -2512,11 +2552,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
> return -EIO;
> }
>
> - /* throttling disk read I/O */
> - if (bs->io_limits_enabled) {
> - bdrv_io_limits_intercept(bs, false, nb_sectors);
> - }
> -
> if (bs->copy_on_read) {
> flags |= BDRV_REQ_COPY_ON_READ;
> }
> @@ -2547,7 +2582,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
> ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
>
> out:
> - tracked_request_end(&req);
> + tracked_request_end(bs, &req, false);
>
> if (flags & BDRV_REQ_COPY_ON_READ) {
> bs->copy_on_read_in_flight--;
> @@ -2625,11 +2660,6 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> return -EIO;
> }
>
> - /* throttling disk write I/O */
> - if (bs->io_limits_enabled) {
> - bdrv_io_limits_intercept(bs, true, nb_sectors);
> - }
> -
> if (bs->copy_on_read_in_flight) {
> wait_for_overlapping_requests(bs, sector_num, nb_sectors);
> }
> @@ -2658,7 +2688,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> bs->wr_highest_sector = sector_num + nb_sectors - 1;
> }
>
> - tracked_request_end(&req);
> + tracked_request_end(bs, &req, true);
>
> return ret;
> }
> @@ -2751,14 +2781,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
> *nb_sectors_ptr = length;
> }
>
> -/* throttling disk io limits */
> -void bdrv_set_io_limits(BlockDriverState *bs,
> - BlockIOLimit *io_limits)
> -{
> - bs->io_limits = *io_limits;
> - bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
> -}
> -
> void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
> BlockdevOnError on_write_error)
> {
> @@ -3568,169 +3590,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
> acb->aiocb_info->cancel(acb);
> }
>
> -/* block I/O throttling */
> -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
> - bool is_write, double elapsed_time, uint64_t *wait)
> -{
> - uint64_t bps_limit = 0;
> - uint64_t extension;
> - double bytes_limit, bytes_base, bytes_res;
> - double slice_time, wait_time;
> -
> - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
> - bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
> - } else if (bs->io_limits.bps[is_write]) {
> - bps_limit = bs->io_limits.bps[is_write];
> - } else {
> - if (wait) {
> - *wait = 0;
> - }
> -
> - return false;
> - }
> -
> - slice_time = bs->slice_end - bs->slice_start;
> - slice_time /= (NANOSECONDS_PER_SECOND);
> - bytes_limit = bps_limit * slice_time;
> - bytes_base = bs->slice_submitted.bytes[is_write];
> - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
> - bytes_base += bs->slice_submitted.bytes[!is_write];
> - }
> -
> - /* bytes_base: the bytes of data which have been read/written; and
> - * it is obtained from the history statistic info.
> - * bytes_res: the remaining bytes of data which need to be read/written.
> - * (bytes_base + bytes_res) / bps_limit: used to calcuate
> - * the total time for completing reading/writting all data.
> - */
> - bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
> -
> - if (bytes_base + bytes_res <= bytes_limit) {
> - if (wait) {
> - *wait = 0;
> - }
> -
> - return false;
> - }
> -
> - /* Calc approx time to dispatch */
> - wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
> -
> - /* When the I/O rate at runtime exceeds the limits,
> - * bs->slice_end need to be extended in order that the current statistic
> - * info can be kept until the timer fire, so it is increased and tuned
> - * based on the result of experiment.
> - */
> - extension = wait_time * NANOSECONDS_PER_SECOND;
> - extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
> - BLOCK_IO_SLICE_TIME;
> - bs->slice_end += extension;
> - if (wait) {
> - *wait = wait_time * NANOSECONDS_PER_SECOND;
> - }
> -
> - return true;
> -}
> -
> -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
> - double elapsed_time, uint64_t *wait)
> -{
> - uint64_t iops_limit = 0;
> - double ios_limit, ios_base;
> - double slice_time, wait_time;
> -
> - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
> - iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
> - } else if (bs->io_limits.iops[is_write]) {
> - iops_limit = bs->io_limits.iops[is_write];
> - } else {
> - if (wait) {
> - *wait = 0;
> - }
> -
> - return false;
> - }
> -
> - slice_time = bs->slice_end - bs->slice_start;
> - slice_time /= (NANOSECONDS_PER_SECOND);
> - ios_limit = iops_limit * slice_time;
> - ios_base = bs->slice_submitted.ios[is_write];
> - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
> - ios_base += bs->slice_submitted.ios[!is_write];
> - }
> -
> - if (ios_base + 1 <= ios_limit) {
> - if (wait) {
> - *wait = 0;
> - }
> -
> - return false;
> - }
> -
> - /* Calc approx time to dispatch, in seconds */
> - wait_time = (ios_base + 1) / iops_limit;
> - if (wait_time > elapsed_time) {
> - wait_time = wait_time - elapsed_time;
> - } else {
> - wait_time = 0;
> - }
> -
> - /* Exceeded current slice, extend it by another slice time */
> - bs->slice_end += BLOCK_IO_SLICE_TIME;
> - if (wait) {
> - *wait = wait_time * NANOSECONDS_PER_SECOND;
> - }
> -
> - return true;
> -}
> -
> -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
> - bool is_write, int64_t *wait)
> -{
> - int64_t now, max_wait;
> - uint64_t bps_wait = 0, iops_wait = 0;
> - double elapsed_time;
> - int bps_ret, iops_ret;
> -
> - now = qemu_get_clock_ns(vm_clock);
> - if (now > bs->slice_end) {
> - bs->slice_start = now;
> - bs->slice_end = now + BLOCK_IO_SLICE_TIME;
> - memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
> - }
> -
> - elapsed_time = now - bs->slice_start;
> - elapsed_time /= (NANOSECONDS_PER_SECOND);
> -
> - bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
> - is_write, elapsed_time, &bps_wait);
> - iops_ret = bdrv_exceed_iops_limits(bs, is_write,
> - elapsed_time, &iops_wait);
> - if (bps_ret || iops_ret) {
> - max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
> - if (wait) {
> - *wait = max_wait;
> - }
> -
> - now = qemu_get_clock_ns(vm_clock);
> - if (bs->slice_end < now + max_wait) {
> - bs->slice_end = now + max_wait;
> - }
> -
> - return true;
> - }
> -
> - if (wait) {
> - *wait = 0;
> - }
> -
> - bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
> - BDRV_SECTOR_SIZE;
> - bs->slice_submitted.ios[is_write]++;
> -
> - return false;
> -}
> -
> /**************************************************************/
> /* async block device emulation */
>
> diff --git a/block/qapi.c b/block/qapi.c
> index a4bc411..45f806b 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -223,18 +223,15 @@ void bdrv_query_info(BlockDriverState *bs,
> info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
>
> if (bs->io_limits_enabled) {
> - info->inserted->bps =
> - bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
> - info->inserted->bps_rd =
> - bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
> - info->inserted->bps_wr =
> - bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
> - info->inserted->iops =
> - bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
> - info->inserted->iops_rd =
> - bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
> - info->inserted->iops_wr =
> - bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
> + ThrottleConfig cfg;
> + throttle_get_config(&bs->throttle_state, &cfg);
> + info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].ups;
> + info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].ups;
> + info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].ups;
> +
> + info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].ups;
> + info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].ups;
> + info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].ups;
> }
>
> bs0 = bs;
> diff --git a/blockdev.c b/blockdev.c
> index 41b0a49..7559ea5 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -281,32 +281,16 @@ static int parse_block_error_action(const char *buf, bool is_read)
> }
> }
>
> -static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp)
> +static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
> {
> - bool bps_flag;
> - bool iops_flag;
> -
> - assert(io_limits);
> -
> - bps_flag = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
> - && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
> - || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
> - iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
> - && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
> - || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
> - if (bps_flag || iops_flag) {
> - error_setg(errp, "bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
> + if (throttle_conflicting(cfg)) {
> + error_setg(errp, "bps/iops/max total values and read/write values"
> "cannot be used at the same time");
> return false;
> }
>
> - if (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] < 0 ||
> - io_limits->bps[BLOCK_IO_LIMIT_WRITE] < 0 ||
> - io_limits->bps[BLOCK_IO_LIMIT_READ] < 0 ||
> - io_limits->iops[BLOCK_IO_LIMIT_TOTAL] < 0 ||
> - io_limits->iops[BLOCK_IO_LIMIT_WRITE] < 0 ||
> - io_limits->iops[BLOCK_IO_LIMIT_READ] < 0) {
> - error_setg(errp, "bps and iops values must be 0 or greater");
> + if (!throttle_is_valid(cfg)) {
> + error_setg(errp, "bps/iops/maxs values must be 0 or greater");
> return false;
> }
>
> @@ -331,7 +315,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
> int on_read_error, on_write_error;
> const char *devaddr;
> DriveInfo *dinfo;
> - BlockIOLimit io_limits;
> + ThrottleConfig cfg;
> int snapshot = 0;
> bool copy_on_read;
> int ret;
> @@ -489,20 +473,31 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
> }
>
> /* disk I/O throttling */
> - io_limits.bps[BLOCK_IO_LIMIT_TOTAL] =
> + cfg.buckets[THROTTLE_BPS_TOTAL].ups =
> qemu_opt_get_number(opts, "throttling.bps-total", 0);
> - io_limits.bps[BLOCK_IO_LIMIT_READ] =
> + cfg.buckets[THROTTLE_BPS_READ].ups =
> qemu_opt_get_number(opts, "throttling.bps-read", 0);
> - io_limits.bps[BLOCK_IO_LIMIT_WRITE] =
> + cfg.buckets[THROTTLE_BPS_WRITE].ups =
> qemu_opt_get_number(opts, "throttling.bps-write", 0);
> - io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
> + cfg.buckets[THROTTLE_OPS_TOTAL].ups =
> qemu_opt_get_number(opts, "throttling.iops-total", 0);
> - io_limits.iops[BLOCK_IO_LIMIT_READ] =
> + cfg.buckets[THROTTLE_OPS_READ].ups =
> qemu_opt_get_number(opts, "throttling.iops-read", 0);
> - io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
> + cfg.buckets[THROTTLE_OPS_WRITE].ups =
> qemu_opt_get_number(opts, "throttling.iops-write", 0);
>
> - if (!do_check_io_limits(&io_limits, &error)) {
> + cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
> + cfg.buckets[THROTTLE_BPS_READ].max = 0;
> + cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
> +
> + cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
> + cfg.buckets[THROTTLE_OPS_READ].max = 0;
> + cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
> +
> + cfg.unit_size = BDRV_SECTOR_SIZE;
> + cfg.op_size = 0;
> +
> + if (!check_throttle_config(&cfg, &error)) {
> error_report("%s", error_get_pretty(error));
> error_free(error);
> return NULL;
> @@ -629,7 +624,10 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
> bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
>
> /* disk I/O throttling */
> - bdrv_set_io_limits(dinfo->bdrv, &io_limits);
> + if (throttle_enabled(&cfg)) {
> + bdrv_io_limits_enable(dinfo->bdrv);
> + bdrv_set_io_limits(dinfo->bdrv, &cfg);
> + }
>
> switch(type) {
> case IF_IDE:
> @@ -1262,7 +1260,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
> int64_t bps_wr, int64_t iops, int64_t iops_rd,
> int64_t iops_wr, Error **errp)
> {
> - BlockIOLimit io_limits;
> + ThrottleConfig cfg;
> BlockDriverState *bs;
>
> bs = bdrv_find(device);
> @@ -1271,27 +1269,37 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
> return;
> }
>
> - io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps;
> - io_limits.bps[BLOCK_IO_LIMIT_READ] = bps_rd;
> - io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr;
> - io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops;
> - io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd;
> - io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr;
> + cfg.buckets[THROTTLE_BPS_TOTAL].ups = bps;
> + cfg.buckets[THROTTLE_BPS_READ].ups = bps_rd;
> + cfg.buckets[THROTTLE_BPS_WRITE].ups = bps_wr;
> +
> + cfg.buckets[THROTTLE_OPS_TOTAL].ups = iops;
> + cfg.buckets[THROTTLE_OPS_READ].ups = iops_rd;
> + cfg.buckets[THROTTLE_OPS_WRITE].ups = iops_wr;
> +
> + cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
> + cfg.buckets[THROTTLE_BPS_READ].max = 0;
> + cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
> +
> + cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
> + cfg.buckets[THROTTLE_OPS_READ].max = 0;
> + cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
>
> - if (!do_check_io_limits(&io_limits, errp)) {
> + cfg.unit_size = BDRV_SECTOR_SIZE;
Does this mean user set bps limit in sector now? I think we should be
consistent to existing parameter unit.
> + cfg.op_size = 0;
Why not set op_size to 1?
> +
> + if (!check_throttle_config(&cfg, errp)) {
> return;
> }
>
> - bs->io_limits = io_limits;
> -
> - if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
> + if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
> bdrv_io_limits_enable(bs);
> - } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
> + } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
> bdrv_io_limits_disable(bs);
> - } else {
> - if (bs->block_timer) {
> - qemu_mod_timer(bs->block_timer, qemu_get_clock_ns(vm_clock));
> - }
> + }
> +
> + if (bs->io_limits_enabled) {
> + bdrv_set_io_limits(bs, &cfg);
> }
> }
>
> diff --git a/include/block/block.h b/include/block/block.h
> index 742fce5..b16d579 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -107,7 +107,6 @@ void bdrv_info_stats(Monitor *mon, QObject **ret_data);
> /* disk I/O throttling */
> void bdrv_io_limits_enable(BlockDriverState *bs);
> void bdrv_io_limits_disable(BlockDriverState *bs);
> -bool bdrv_io_limits_enabled(BlockDriverState *bs);
>
> void bdrv_init(void);
> void bdrv_init_with_whitelist(void);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index e45f2a0..a0b6bc1 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -34,18 +34,12 @@
> #include "monitor/monitor.h"
> #include "qemu/hbitmap.h"
> #include "block/snapshot.h"
> +#include "qemu/throttle.h"
>
> #define BLOCK_FLAG_ENCRYPT 1
> #define BLOCK_FLAG_COMPAT6 4
> #define BLOCK_FLAG_LAZY_REFCOUNTS 8
>
> -#define BLOCK_IO_LIMIT_READ 0
> -#define BLOCK_IO_LIMIT_WRITE 1
> -#define BLOCK_IO_LIMIT_TOTAL 2
> -
> -#define BLOCK_IO_SLICE_TIME 100000000
> -#define NANOSECONDS_PER_SECOND 1000000000.0
> -
> #define BLOCK_OPT_SIZE "size"
> #define BLOCK_OPT_ENCRYPT "encryption"
> #define BLOCK_OPT_COMPAT6 "compat6"
> @@ -69,17 +63,6 @@ typedef struct BdrvTrackedRequest {
> CoQueue wait_queue; /* coroutines blocked on this request */
> } BdrvTrackedRequest;
>
> -
> -typedef struct BlockIOLimit {
> - int64_t bps[3];
> - int64_t iops[3];
> -} BlockIOLimit;
> -
> -typedef struct BlockIOBaseValue {
> - uint64_t bytes[2];
> - uint64_t ios[2];
> -} BlockIOBaseValue;
> -
> struct BlockDriver {
> const char *format_name;
> int instance_size;
> @@ -263,13 +246,9 @@ struct BlockDriverState {
> /* number of in-flight copy-on-read requests */
> unsigned int copy_on_read_in_flight;
>
> - /* the time for latest disk I/O */
> - int64_t slice_start;
> - int64_t slice_end;
> - BlockIOLimit io_limits;
> - BlockIOBaseValue slice_submitted;
> - CoQueue throttled_reqs;
> - QEMUTimer *block_timer;
> + /* I/O throttling */
> + ThrottleState throttle_state;
> + CoQueue throttled_reqs[2];
> bool io_limits_enabled;
>
> /* I/O stats (display with "info blockstats"). */
> @@ -308,7 +287,8 @@ struct BlockDriverState {
> int get_tmp_filename(char *filename, int size);
>
> void bdrv_set_io_limits(BlockDriverState *bs,
> - BlockIOLimit *io_limits);
> + ThrottleConfig *cfg);
> +
>
> /**
> * bdrv_add_before_write_notifier:
> --
> 1.7.10.4
>
>
next prev parent reply other threads:[~2013-08-14 9:31 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-12 16:53 [Qemu-devel] [PATCH V5 0/5] Continuous Leaky Bucket Throttling Benoît Canet
2013-08-12 16:53 ` [Qemu-devel] [PATCH V5 1/5] throttle: Add a new throttling API implementing continuous leaky bucket Benoît Canet
2013-08-14 8:52 ` Fam Zheng
2013-08-16 11:45 ` Stefan Hajnoczi
2013-08-19 11:27 ` Paolo Bonzini
2013-08-12 16:53 ` [Qemu-devel] [PATCH V5 2/5] throttle: Add units tests Benoît Canet
2013-08-12 16:53 ` [Qemu-devel] [PATCH V5 3/5] block: Enable the new throttling code in the block layer Benoît Canet
2013-08-14 9:31 ` Fam Zheng [this message]
2013-08-14 9:50 ` Fam Zheng
2013-08-16 12:02 ` Stefan Hajnoczi
2013-08-12 16:53 ` [Qemu-devel] [PATCH V5 4/5] block: Add support for throttling burst max in QMP and the command line Benoît Canet
2013-08-16 12:07 ` Stefan Hajnoczi
2013-08-12 16:53 ` [Qemu-devel] [PATCH V5 5/5] block: Add iops_sector_count to do the iops accounting for a given io size Benoît Canet
2013-08-14 9:48 ` Fam Zheng
2013-08-14 18:31 ` Benoît Canet
2013-08-16 12:10 ` Stefan Hajnoczi
2013-08-16 12:14 ` [Qemu-devel] [PATCH V5 0/5] Continuous Leaky Bucket Throttling Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130814093125.GC11081@localhost.localdomain \
--to=famz@redhat.com \
--cc=benoit@irqsave.net \
--cc=kwolf@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.