From: Konrad Rzeszutek Wilk <konrad@kernel.org>
To: Vasiliy Tolstov <v.tolstov@selfip.ru>
Cc: Wei Liu <wei.liu2@citrix.com>,
"xen-devel@lists.xen.org" <xen-devel@lists.xen.org>
Subject: Re: blkback disk I/O limit patch
Date: Fri, 1 Feb 2013 09:42:56 -0500 [thread overview]
Message-ID: <20130201144255.GA7461@konrad-lan.dumpdata.com> (raw)
In-Reply-To: <CACaajQvp5p_77CtB9tTohM4iD0sqeJ3Bwm=dbBwRKRitXr9p7g@mail.gmail.com>
On Fri, Feb 01, 2013 at 10:53:46AM +0400, Vasiliy Tolstov wrote:
> Sorry,
Ugh, you didn't inline it - you just copied and pasted it.
Also you are missing an SoB and a description of what this patch does
and why is it better than existing device mapper I/O limiting work?
>
> diff -NruabBEp xen_blkback_limit.orig/blkback.c xen_blkback_limit.new//blkback.c
> --- xen_blkback_limit.orig/blkback.c 2012-12-04 13:03:58.000000000 +0400
> +++ xen_blkback_limit.new//blkback.c 2013-01-28 08:11:30.000000000 +0400
> @@ -211,10 +211,18 @@ static void print_stats(blkif_t *blkif)
> blkif->st_pk_req = 0;
> }
>
> +static void refill_iops(blkif_t *blkif)
> +{
> + blkif->reqtime = jiffies + msecs_to_jiffies(1000);
> + blkif->reqcount = 0;
> +}
> +
> int blkif_schedule(void *arg)
> {
> blkif_t *blkif = arg;
> struct vbd *vbd = &blkif->vbd;
> + int ret = 0;
> + struct timeval cur_time;
>
> blkif_get(blkif);
>
> @@ -237,10 +245,22 @@ int blkif_schedule(void *arg)
> blkif->waiting_reqs = 0;
> smp_mb(); /* clear flag *before* checking for work */
>
> - if (do_block_io_op(blkif))
> + ret = do_block_io_op(blkif);
> + if (ret)
> blkif->waiting_reqs = 1;
> unplug_queue(blkif);
>
> + if (blkif->reqrate) {
> + if (2 == ret && (blkif->reqtime > jiffies)) {
> + jiffies_to_timeval(jiffies, &cur_time);
> +
> + set_current_state(TASK_INTERRUPTIBLE);
> + schedule_timeout(blkif->reqtime - jiffies);
> + }
> + if (time_after(jiffies, blkif->reqtime))
> + refill_iops(blkif);
> + }
> +
> if (log_stats && time_after(jiffies, blkif->st_print))
> print_stats(blkif);
> }
> @@ -394,10 +414,19 @@ static int _do_block_io_op(blkif_t *blki
> rp = blk_rings->common.sring->req_prod;
> rmb(); /* Ensure we see queued requests up to 'rp'. */
>
> + if (blkif->reqrate && (blkif->reqcount >= blkif->reqrate)) {
> + return (rc != rp) ? 2 : 0;
> + }
> +
> while (rc != rp) {
> if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
> break;
>
> + if (blkif->reqrate) {
> + if (blkif->reqcount >= blkif->reqrate)
> + return 2;
> + }
> +
> if (kthread_should_stop())
> return 1;
>
> @@ -434,8 +463,8 @@ static int _do_block_io_op(blkif_t *blki
>
> /* Apply all sanity checks to /private copy/ of request. */
> barrier();
> -
> dispatch_rw_block_io(blkif, &req, pending_req);
> + blkif->reqcount++;
> break;
> case BLKIF_OP_DISCARD:
> blk_rings->common.req_cons = rc;
> @@ -452,7 +481,7 @@ static int _do_block_io_op(blkif_t *blki
> break;
> default:
> /* A good sign something is wrong: sleep for a while to
> - * avoid excessive CPU consumption by a bad guest. */
> + * avoid excessive CPU consumption by a bad guest.*/
> msleep(1);
> blk_rings->common.req_cons = rc;
> barrier();
> @@ -501,6 +530,7 @@ static void dispatch_rw_block_io(blkif_t
> uint32_t flags;
> int ret, i;
> int operation;
> + struct timeval cur_time;
>
> switch (req->operation) {
> case BLKIF_OP_READ:
> @@ -658,6 +688,7 @@ static void dispatch_rw_block_io(blkif_t
> else
> blkif->st_wr_sect += preq.nr_sects;
>
> + jiffies_to_timeval(jiffies, &cur_time);
> return;
>
> fail_flush:
> diff -NruabBEp xen_blkback_limit.orig/common.h xen_blkback_limit.new//common.h
> --- xen_blkback_limit.orig/common.h 2012-12-04 13:03:58.000000000 +0400
> +++ xen_blkback_limit.new//common.h 2013-01-28 08:09:35.000000000 +0400
> @@ -82,6 +82,11 @@ typedef struct blkif_st {
> unsigned int waiting_reqs;
> struct request_queue *plug;
>
> + /* qos information */
> + unsigned long reqtime;
> + int reqcount;
> + int reqrate;
> +
> /* statistics */
> unsigned long st_print;
> int st_rd_req;
> @@ -106,6 +111,8 @@ struct backend_info
> unsigned major;
> unsigned minor;
> char *mode;
> + /* qos information */
> + struct xenbus_watch reqrate_watch;
> };
>
> blkif_t *blkif_alloc(domid_t domid);
> diff -NruabBEp xen_blkback_limit.orig/xenbus.c xen_blkback_limit.new//xenbus.c
> --- xen_blkback_limit.orig/xenbus.c 2012-12-04 13:03:58.000000000 +0400
> +++ xen_blkback_limit.new//xenbus.c 2013-01-28 08:22:26.000000000 +0400
> @@ -120,6 +120,79 @@ static void update_blkif_status(blkif_t
> } \
> static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
>
> +static ssize_t
> +show_reqrate(struct device *_dev, struct device_attribute *attr, char *buf)
> +{
> + ssize_t ret = -ENODEV;
> + struct xenbus_device *dev;
> + struct backend_info *be;
> +
> + if (!get_device(_dev))
> + return ret;
> +
> + dev = to_xenbus_device(_dev);
> + be = dev_get_drvdata(&dev->dev);
> +
> + if (be != NULL)
> + ret = sprintf(buf, "%d\n", be->blkif->reqrate);
> +
> + put_device(_dev);
> +
> + return ret;
> +}
> +
> +static ssize_t
> +store_reqrate(struct device *_dev, struct device_attribute *attr,
> + const char *buf, size_t size)
> +{
> + int value;
> + struct xenbus_device *dev;
> + struct backend_info *be;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (!get_device(_dev))
> + return -ENODEV;
> +
> + if (sscanf(buf, "%d", &value) != 1)
> + return -EINVAL;
> +
> + dev = to_xenbus_device(_dev);
> + be = dev_get_drvdata(&dev->dev);
> +
> + if (be != NULL)
> + be->blkif->reqrate = value;
> +
> + put_device(_dev);
> +
> + return size;
> +}
> +static DEVICE_ATTR(reqrate, S_IRUGO | S_IWUSR, show_reqrate,
> + store_reqrate);
> +
> +static ssize_t
> +show_reqcount(struct device *_dev, struct device_attribute *attr, char *buf)
> +{
> + ssize_t ret = -ENODEV;
> + struct xenbus_device *dev;
> + struct backend_info *be;
> +
> + if (!get_device(_dev))
> + return ret;
> +
> + dev = to_xenbus_device(_dev);
> + be = dev_get_drvdata(&dev->dev);
> +
> + if (be != NULL)
> + ret = sprintf(buf, "%d\n", be->blkif->reqcount);
> +
> + put_device(_dev);
> +
> + return ret;
> +}
> +static DEVICE_ATTR(reqcount, S_IRUGO | S_IWUSR, show_reqcount, NULL);
> +
> VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
> VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
> VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
> @@ -146,6 +219,17 @@ static const struct attribute_group vbds
> .attrs = vbdstat_attrs,
> };
>
> +static struct attribute *vbdreq_attrs[] = {
> + &dev_attr_reqrate.attr,
> + &dev_attr_reqcount.attr,
> + NULL
> +};
> +
> +static const struct attribute_group vbdreq_group = {
> + .name = "qos",
> + .attrs = vbdreq_attrs,
> +};
> +
> VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
> VBD_SHOW(mode, "%s\n", be->mode);
>
> @@ -165,8 +249,13 @@ int xenvbd_sysfs_addif(struct xenbus_dev
> if (error)
> goto fail3;
>
> + error = sysfs_create_group(&dev->dev.kobj, &vbdreq_group);
> + if (error)
> + goto fail4;
> +
> return 0;
>
> +fail4: sysfs_remove_group(&dev->dev.kobj, &vbdreq_group);
> fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
> fail2: device_remove_file(&dev->dev, &dev_attr_mode);
> fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
> @@ -175,6 +264,7 @@ fail1: device_remove_file(&dev->dev, &de
>
> void xenvbd_sysfs_delif(struct xenbus_device *dev)
> {
> + sysfs_remove_group(&dev->dev.kobj, &vbdreq_group);
> sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
> device_remove_file(&dev->dev, &dev_attr_mode);
> device_remove_file(&dev->dev, &dev_attr_physical_device);
> @@ -201,6 +291,12 @@ static int blkback_remove(struct xenbus_
> be->cdrom_watch.node = NULL;
> }
>
> + if (be->reqrate_watch.node) {
> + unregister_xenbus_watch(&be->reqrate_watch);
> + kfree(be->reqrate_watch.node);
> + be->reqrate_watch.node = NULL;
> + }
> +
> if (be->blkif) {
> blkif_disconnect(be->blkif);
> vbd_free(&be->blkif->vbd);
> @@ -338,6 +434,7 @@ static void backend_changed(struct xenbu
> struct xenbus_device *dev = be->dev;
> int cdrom = 0;
> char *device_type;
> + char name[TASK_COMM_LEN];
>
> DPRINTK("");
>
> @@ -376,6 +473,21 @@ static void backend_changed(struct xenbu
> kfree(device_type);
> }
>
> + /* gather information about QoS policy for this device. */
> + err = blkback_name(be->blkif, name);
> + if (err) {
> + xenbus_dev_error(be->dev, err, "get blkback dev name");
> + return;
> + }
> +
> + err = xenbus_gather(XBT_NIL, dev->otherend,
> + "reqrate", "%d", &be->blkif->reqrate,
> + NULL);
> + if (err)
> + DPRINTK("%s xenbus_gather(reqrate) error", name);
> +
> + be->blkif->reqtime = jiffies;
> +
> if (be->major == 0 && be->minor == 0) {
> /* Front end dir is a number, which is used as the handle. */
>
> @@ -482,6 +594,30 @@ static void frontend_changed(struct xenb
>
> /* ** Connection ** */
>
> +static void reqrate_changed(struct xenbus_watch *watch,
> + const char **vec, unsigned int len)
> +{
> + struct backend_info *be = container_of(watch, struct backend_info,
> + reqrate_watch);
> + int err;
> + char name[TASK_COMM_LEN];
> +
> + err = blkback_name(be->blkif, name);
> + if (err) {
> + xenbus_dev_error(be->dev, err, "get blkback dev name");
> + return;
> + }
> +
> + err = xenbus_gather(XBT_NIL, be->dev->otherend,
> + "reqrate", "%d",
> + &be->blkif->reqrate, NULL);
> + if (err) {
> + DPRINTK("%s xenbus_gather(reqrate) error", name);
> + } else {
> + if (be->blkif->reqrate <= 0)
> + be->blkif->reqrate = 0;
> + }
> +}
>
> /**
> * Write the physical details regarding the block device to the store, and
> @@ -542,6 +678,21 @@ again:
> xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
> dev->nodename);
>
> + if (be->reqrate_watch.node) {
> + unregister_xenbus_watch(&be->reqrate_watch);
> + kfree(be->reqrate_watch.node);
> + be->reqrate_watch.node = NULL;
> + }
> +
> + err = xenbus_watch_path2(dev, dev->otherend, "reqrate",
> + &be->reqrate_watch,
> + reqrate_changed);
> + if (err) {
> + xenbus_dev_fatal(dev, err, "%s: watching reqrate",
> + dev->nodename);
> + goto abort;
> + }
> +
> return;
> abort:
> xenbus_transaction_end(xbt, 1);
>
> 2013/1/31 Wei Liu <wei.liu2@citrix.com>:
> > On Thu, 2013-01-31 at 05:14 +0000, Vasiliy Tolstov wrote:
> >> Sorry forget to send patch
> >> https://bitbucket.org/go2clouds/patches/raw/master/xen_blkback_limit/3.6.9-1.patch
> >> Patch for kernel 3.6.9, but if that needed i can rebase it to current
> >> git Linus tree.
> >
> > Can you inline your patch in your email so that developer can comment on
> > it.
> >
> >
> > Wei.
> >
>
>
>
> --
> Vasiliy Tolstov,
> Clodo.ru
> e-mail: v.tolstov@selfip.ru
> jabber: vase@selfip.ru
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>
next prev parent reply other threads:[~2013-02-01 14:42 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-31 5:12 blkback disk I/O limit patch Vasiliy Tolstov
2013-01-31 5:14 ` Vasiliy Tolstov
2013-01-31 18:05 ` Wei Liu
2013-02-01 6:53 ` Vasiliy Tolstov
2013-02-01 14:42 ` Konrad Rzeszutek Wilk [this message]
2013-02-05 13:14 ` [PATCH 1/1] drivers/block/xen-blkback: Limit blkback i/o Vasiliy Tolstov
2013-02-05 13:17 ` blkback disk I/O limit patch Vasiliy Tolstov
2013-02-01 10:59 ` Vasiliy Tolstov
2013-02-05 15:37 ` Alex Bligh
2013-02-05 16:36 ` Vasiliy Tolstov
2013-02-05 18:01 ` Alex Bligh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130201144255.GA7461@konrad-lan.dumpdata.com \
--to=konrad@kernel.org \
--cc=v.tolstov@selfip.ru \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.