All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Liu <bob.liu@oracle.com>
To: "Roger Pau Monné" <roger.pau@citrix.com>
Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org,
	konrad.wilk@oracle.com, jgross@suse.com
Subject: Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
Date: Thu, 21 Jul 2016 18:08:05 +0800	[thread overview]
Message-ID: <57909F05.9030809@oracle.com> (raw)
In-Reply-To: <20160721085756.ps4rtdns4xh35yii@mac>


On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
>> The current VBD layer reserves buffer space for each attached device based on
>> three statically configured settings which are read at boot time.
>>  * max_indirect_segs: Maximum amount of segments.
>>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>>  * max_queues: Maximum of queues(rings) to be used.
>>
>> But the storage backend, workload, and guest memory result in very different
>> tuning requirements. It's impossible to centrally predict application
>> characteristics so it's best to leave allow the settings can be dynamiclly
>> adjusted based on workload inside the Guest.
>>
>> Usage:
>> Show current values:
>> cat /sys/devices/vbd-xxx/max_indirect_segs
>> cat /sys/devices/vbd-xxx/max_ring_page_order
>> cat /sys/devices/vbd-xxx/max_queues
>>
>> Write new values:
>> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
>> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
>> echo <new value> > /sys/devices/vbd-xxx/max_queues
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> --
>> v2: Add device lock and other comments from Konrad.
>> ---
>>  drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 283 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 10f46a8..9a5ed22 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -46,6 +46,7 @@
>>  #include <linux/scatterlist.h>
>>  #include <linux/bitmap.h>
>>  #include <linux/list.h>
>> +#include <linux/delay.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/xenbus.h>
>> @@ -212,6 +213,11 @@ struct blkfront_info
>>  	/* Save uncomplete reqs and bios for migration. */
>>  	struct list_head requests;
>>  	struct bio_list bio_list;
>> +	/* For dynamic configuration. */
>> +	unsigned int reconfiguring:1;
>> +	int new_max_indirect_segments;
>> +	int new_max_ring_page_order;
>> +	int new_max_queues;
>>  };
>>  
>>  static unsigned int nr_minors;
>> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>>  	for (i = 0; i < info->nr_rings; i++)
>>  		blkif_free_ring(&info->rinfo[i]);
>>  
>> +	/* Remove old xenstore nodes. */
>> +	if (info->nr_ring_pages > 1)
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
>> +
>> +	if (info->nr_rings == 1) {
>> +		if (info->nr_ring_pages == 1) {
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
>> +		} else {
>> +			for (i = 0; i < info->nr_ring_pages; i++) {
>> +				char ring_ref_name[RINGREF_NAME_LEN];
>> +
>> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
>> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
>> +			}
>> +		}
>> +	} else {
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
>> +
>> +		for (i = 0; i < info->nr_rings; i++) {
>> +			char queuename[QUEUE_NAME_LEN];
>> +
>> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
>> +		}
>> +	}
>>  	kfree(info->rinfo);
>>  	info->rinfo = NULL;
>>  	info->nr_rings = 0;
>> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
>>  		info->nr_ring_pages = 1;
>>  	else {
>>  		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
>> +		if (info->new_max_ring_page_order) {
> 
> Instead of calling this "new_max_ring_page_order", could you just call it 
> max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 


Sure, I can do that.


> and use it everywhere instead of xen_blkif_max_ring_order?


But "xen_blkif_max_ring_order" still have to be used here, this is the only place "xen_blkif_max_ring_order" is used(except checking the value of it in xlblk_init()).


> 
>> +			BUG_ON(info->new_max_ring_page_order > max_page_order);
>> +			ring_page_order = info->new_max_ring_page_order;
>> +		}
>>  		info->nr_ring_pages = 1 << ring_page_order;
>>  	}
>>  
>> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
>>  		backend_max_queues = 1;
>>  
>>  	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
>> +	if (info->new_max_queues) {
> 
> Same here IMHO, this is going to make the code flow slightly easier to 
> understand.
> 
>> +		BUG_ON(info->new_max_queues > backend_max_queues);
>> +		info->nr_rings = info->new_max_queues;
>> +	}
>>  	/* We need at least one ring. */
>>  	if (!info->nr_rings)
>>  		info->nr_rings = 1;
>> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
>>  			    NULL);
>>  	if (err)
>>  		info->max_indirect_segments = 0;
>> -	else
>> +	else {
>>  		info->max_indirect_segments = min(indirect_segments,
>>  						  xen_blkif_max_segments);
>> +		if (info->new_max_indirect_segments) {
>> +			BUG_ON(info->new_max_indirect_segments > indirect_segments);
>> +			info->max_indirect_segments = info->new_max_indirect_segments;
>> +		}
>> +	}
>> +}
>> +
>> +static ssize_t max_ring_page_order_show(struct device *dev,
>> +					struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>> +}
>> +
>> +static ssize_t max_indirect_segs_show(struct device *dev,
>> +				      struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->max_indirect_segments);
>> +}
>> +
>> +static ssize_t max_queues_show(struct device *dev,
>> +			       struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->nr_rings);
>> +}
>> +
>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>> +{
>> +	unsigned int i;
>> +	int err = -EBUSY;
>> +
>> +	/*
>> +	 * Make sure no migration in parallel, device lock is actually a
>> +	 * mutex.
>> +	 */
>> +	if (!device_trylock(&info->xbdev->dev)) {
>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>> +			dev_name(&info->xbdev->dev));
>> +		return err;
>> +	}
>> +
>> +	/*
>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>> +	 */
>> +	blk_mq_freeze_queue(info->rq);
>> +	if (part_in_flight(&info->gd->part0))
>> +		goto out;
>> +
>> +	/*
>> +	 * Front 				Backend
>> +	 * Switch to XenbusStateClosed
>> +	 *					frontend_changed():
>> +	 *					 case XenbusStateClosed:
>> +	 *						xen_blkif_disconnect()
>> +	 *						Switch to XenbusStateClosed
>> +	 * blkfront_resume():
>> +	 *					frontend_changed():
>> +	 *						reconnect
>> +	 * Wait until XenbusStateConnected
>> +	 */
>> +	info->reconfiguring = true;
>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>> +
>> +	/* Poll every 100ms, 1 minute timeout. */
>> +	for (i = 0; i < 600; i++) {
>> +		/*
>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>> +		 * will clear reconfiguring.
>> +		 */
>> +		if (!info->reconfiguring)
>> +			goto resume;
>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>> +	}
> 
> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> suspension does? blkback_changed would have to set the frontend state to 
> InitWait when it detects that the backend has switched to Closed, and call 
> blkfront_resume.


I think that won't work.
In the real "resume" case, the power management system will trigger all ->resume() path.
But there is no place for dynamic configuration.

Thanks,
Bob Liu

  reply	other threads:[~2016-07-21 10:09 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
2016-07-15  9:31 ` Bob Liu
2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
2016-07-15  9:31 ` Bob Liu
2016-07-21  8:29   ` Roger Pau Monné
2016-07-21  9:44     ` Bob Liu
2016-07-21  9:44       ` Bob Liu
2016-07-21  8:29   ` Roger Pau Monné
2016-07-15  9:31 ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
2016-07-21  8:57   ` Roger Pau Monné
2016-07-21  8:57   ` Roger Pau Monné
2016-07-21 10:08     ` Bob Liu [this message]
2016-07-22  7:45       ` Roger Pau Monné
2016-07-22  8:17         ` Bob Liu
2016-07-22  8:17         ` Bob Liu
2016-07-22  9:34           ` Roger Pau Monné
2016-07-22  9:43             ` Bob Liu
2016-07-22 11:45               ` Roger Pau Monné
2016-07-22 11:45               ` Roger Pau Monné
2016-07-22 22:18                 ` Bob Liu
2016-07-22 22:18                 ` Bob Liu
2016-07-25  9:20                   ` Roger Pau Monné
2016-07-25  9:20                   ` Roger Pau Monné
2016-07-25 10:29                     ` Bob Liu
2016-07-25 10:53                       ` Roger Pau Monné
2016-07-25 10:53                       ` Roger Pau Monné
2016-07-25 11:08                         ` Bob Liu
2016-07-25 12:11                           ` Roger Pau Monné
2016-07-25 12:11                           ` Roger Pau Monné
2016-07-25 12:25                             ` Bob Liu
2016-07-25 12:25                             ` Bob Liu
2016-07-25 11:08                         ` Bob Liu
2016-07-25 10:29                     ` Bob Liu
2016-07-22  9:43             ` Bob Liu
2016-07-22  9:34           ` Roger Pau Monné
2016-07-22  7:45       ` Roger Pau Monné
2016-07-21 10:08     ` Bob Liu
2016-07-15  9:31 ` Bob Liu
2016-07-21  8:06 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Roger Pau Monné
2016-07-21  8:06 ` Roger Pau Monné

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57909F05.9030809@oracle.com \
    --to=bob.liu@oracle.com \
    --cc=jgross@suse.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.