From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:41691) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QmLpa-0004DF-Nc for qemu-devel@nongnu.org; Thu, 28 Jul 2011 04:20:36 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1QmLpZ-000478-8p for qemu-devel@nongnu.org; Thu, 28 Jul 2011 04:20:34 -0400 Received: from mail-yx0-f173.google.com ([209.85.213.173]:58701) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QmLpZ-00046w-3X for qemu-devel@nongnu.org; Thu, 28 Jul 2011 04:20:33 -0400 Received: by yxt3 with SMTP id 3so1655136yxt.4 for ; Thu, 28 Jul 2011 01:20:31 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: References: <1311670746-20498-1-git-send-email-wuzhy@linux.vnet.ibm.com> <1311670746-20498-2-git-send-email-wuzhy@linux.vnet.ibm.com> <20110726192618.GA8126@amt.cnet> Date: Thu, 28 Jul 2011 09:20:31 +0100 Message-ID: From: Stefan Hajnoczi Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v2 1/1] The codes V2 for QEMU disk I/O limits. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Zhi Yong Wu Cc: kwolf@redhat.com, aliguori@us.ibm.com, stefanha@linux.vnet.ibm.com, kvm@vger.kernel.org, Marcelo Tosatti , qemu-devel@nongnu.org, ryanh@us.ibm.com, Zhi Yong Wu , vgoyal@redhat.com On Thu, Jul 28, 2011 at 6:43 AM, Zhi Yong Wu wrote: > On Wed, Jul 27, 2011 at 8:58 PM, Stefan Hajnoczi wro= te: >> On Wed, Jul 27, 2011 at 11:17 AM, Zhi Yong Wu wro= te: >>> On Wed, Jul 27, 2011 at 3:26 AM, Marcelo Tosatti = wrote: >>>> On Tue, Jul 26, 2011 at 04:59:06PM +0800, Zhi Yong Wu wrote: >>>>> Welcome to give me your comments, thanks. >>>>> >>>>> Signed-off-by: Zhi Yong Wu >>>>> --- >>>>> =A0Makefile.objs =A0 =A0 | =A0 =A02 +- >>>>> =A0block.c =A0 =A0 =A0 =A0 =A0 | =A0288 +++++++++++++++++++++++++++++= ++++++++++++++++++++++-- >>>>> =A0block.h =A0 =A0 =A0 =A0 =A0 | =A0 =A01 - >>>>> =A0block/blk-queue.c | =A0116 +++++++++++++++++++++ >>>>> =A0block/blk-queue.h | =A0 70 +++++++++++++ >>>>> =A0block_int.h =A0 =A0 =A0 | =A0 28 +++++ >>>>> =A0blockdev.c =A0 =A0 =A0 =A0| =A0 21 ++++ >>>>> =A0qemu-config.c =A0 =A0 | =A0 24 +++++ >>>>> =A0qemu-option.c =A0 =A0 | =A0 17 +++ >>>>> =A0qemu-option.h =A0 =A0 | =A0 =A01 + >>>>> =A0qemu-options.hx =A0 | =A0 =A01 + >>>>> =A011 files changed, 559 insertions(+), 10 deletions(-) >>>>> =A0create mode 100644 block/blk-queue.c >>>>> =A0create mode 100644 block/blk-queue.h >>>>> >>>>> diff --git a/Makefile.objs b/Makefile.objs >>>>> index 9f99ed4..06f2033 100644 >>>>> --- a/Makefile.objs >>>>> +++ b/Makefile.objs >>>>> @@ -23,7 +23,7 @@ block-nested-y +=3D raw.o cow.o qcow.o vdi.o vmdk.o= cloop.o dmg.o bochs.o vpc.o vv >>>>> =A0block-nested-y +=3D qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2= -snapshot.o qcow2-cache.o >>>>> =A0block-nested-y +=3D qed.o qed-gencb.o qed-l2-cache.o qed-table.o q= ed-cluster.o >>>>> =A0block-nested-y +=3D qed-check.o >>>>> -block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkverif= y.o >>>>> +block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkverif= y.o blk-queue.o >>>>> =A0block-nested-$(CONFIG_WIN32) +=3D raw-win32.o >>>>> =A0block-nested-$(CONFIG_POSIX) +=3D raw-posix.o >>>>> =A0block-nested-$(CONFIG_CURL) +=3D curl.o >>>>> diff --git a/block.c b/block.c >>>>> index 24a25d5..e54e59c 100644 >>>>> --- a/block.c >>>>> +++ b/block.c >>>>> @@ -29,6 +29,9 @@ >>>>> =A0#include "module.h" >>>>> =A0#include "qemu-objects.h" >>>>> >>>>> +#include "qemu-timer.h" >>>>> +#include "block/blk-queue.h" >>>>> + >>>>> =A0#ifdef CONFIG_BSD >>>>> =A0#include >>>>> =A0#include >>>>> @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int6= 4_t sector_num, >>>>> =A0static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, >>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 const uint8_t *bu= f, int nb_sectors); >>>>> >>>>> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sect= ors, >>>>> + =A0 =A0 =A0 =A0bool is_write, double elapsed_time, uint64_t *wait); >>>>> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_wr= ite, >>>>> + =A0 =A0 =A0 =A0double elapsed_time, uint64_t *wait); >>>>> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_secto= rs, >>>>> + =A0 =A0 =A0 =A0bool is_write, uint64_t *wait); >>>>> + >>>>> =A0static QTAILQ_HEAD(, BlockDriverState) bdrv_states =3D >>>>> =A0 =A0 =A0QTAILQ_HEAD_INITIALIZER(bdrv_states); >>>>> >>>>> @@ -90,6 +100,20 @@ int is_windows_drive(const char *filename) >>>>> =A0} >>>>> =A0#endif >>>>> >>>>> +static int bdrv_io_limits_enable(BlockIOLimit *io_limits) >>>>> +{ >>>>> + =A0 =A0if ((io_limits->bps[0] =3D=3D 0) >>>>> + =A0 =A0 =A0 =A0 && (io_limits->bps[1] =3D=3D 0) >>>>> + =A0 =A0 =A0 =A0 && (io_limits->bps[2] =3D=3D 0) >>>>> + =A0 =A0 =A0 =A0 && (io_limits->iops[0] =3D=3D 0) >>>>> + =A0 =A0 =A0 =A0 && (io_limits->iops[1] =3D=3D 0) >>>>> + =A0 =A0 =A0 =A0 && (io_limits->iops[2] =3D=3D 0)) { >>>>> + =A0 =A0 =A0 =A0return 0; >>>>> + =A0 =A0} >>>>> + >>>>> + =A0 =A0return 1; >>>>> +} >>>>> + >>>>> =A0/* check if the path starts with ":" */ >>>>> =A0static int path_has_protocol(const char *path) >>>>> =A0{ >>>>> @@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size, >>>>> =A0 =A0 =A0} >>>>> =A0} >>>>> >>>>> +static void bdrv_block_timer(void *opaque) >>>>> +{ >>>>> + =A0 =A0BlockDriverState *bs =3D opaque; >>>>> + =A0 =A0BlockQueue *queue =3D bs->block_queue; >>>>> + >>>>> + =A0 =A0while (!QTAILQ_EMPTY(&queue->requests)) { >>>>> + =A0 =A0 =A0 =A0BlockIORequest *request; >>>>> + =A0 =A0 =A0 =A0int ret; >>>>> + >>>>> + =A0 =A0 =A0 =A0request =3D QTAILQ_FIRST(&queue->requests); >>>>> + =A0 =A0 =A0 =A0QTAILQ_REMOVE(&queue->requests, request, entry); >>>>> + >>>>> + =A0 =A0 =A0 =A0ret =3D qemu_block_queue_handler(request); >>>>> + =A0 =A0 =A0 =A0if (ret =3D=3D 0) { >>>>> + =A0 =A0 =A0 =A0 =A0 =A0QTAILQ_INSERT_HEAD(&queue->requests, request= , entry); >>>>> + =A0 =A0 =A0 =A0 =A0 =A0break; >>>>> + =A0 =A0 =A0 =A0} >>>>> + >>>>> + =A0 =A0 =A0 =A0qemu_free(request); >>>>> + =A0 =A0} >>>>> +} >>>>> + >>>>> =A0void bdrv_register(BlockDriver *bdrv) >>>>> =A0{ >>>>> =A0 =A0 =A0if (!bdrv->bdrv_aio_readv) { >>>>> @@ -642,6 +688,15 @@ int bdrv_open(BlockDriverState *bs, const char *= filename, int flags, >>>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->change_cb(bs->change_opaque, CHANGE_ME= DIA); >>>>> =A0 =A0 =A0} >>>>> >>>>> + =A0 =A0/* throttling disk I/O limits */ >>>>> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >>>>> + =A0 =A0 =A0 =A0bs->block_queue =3D qemu_new_block_queue(); >>>>> + =A0 =A0 =A0 =A0bs->block_timer =3D qemu_new_timer_ns(vm_clock, bdrv= _block_timer, bs); >>>>> + >>>>> + =A0 =A0 =A0 =A0bs->slice_start[0] =3D qemu_get_clock_ns(vm_clock); >>>>> + =A0 =A0 =A0 =A0bs->slice_start[1] =3D qemu_get_clock_ns(vm_clock); >>>>> + =A0 =A0} >>>>> + >>>> >>>> It should be possible to tune the limits on the flight, please introdu= ce >>>> QMP commands for that. >>> Yeah, I am working on this. >> >> It's worth mentioning that the I/O limits commands can use Supriya's >> new block_set command for changing block device parameters at runtime. >> =A0So I think the runtime limits changing can be a separate patch. > Since I/O limits commands will depend on Supriya's block_set patch, > when will her patch be merged into qemu.git? The block_set patch is on the mailing list with an ongoing discussion. Until agreement is reached it will not be merged - hard to tell when exactly that will be, but hopefully over the next few days. You can always add temporary QMP/HMP commands in order to test setting limits at runtime. It should be easy to switch to a different QMP/HMP command later, if necessary. Stefan