Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Bob Liu <bob.liu@oracle.com>
To: "Roger Pau Monné" <roger.pau@citrix.com>
Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org,
	konrad.wilk@oracle.com, jgross@suse.com
Subject: Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
Date: Thu, 21 Jul 2016 18:08:05 +0800	[thread overview]
Message-ID: <57909F05.9030809@oracle.com> (raw)
In-Reply-To: <20160721085756.ps4rtdns4xh35yii@mac>


On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
>> The current VBD layer reserves buffer space for each attached device based on
>> three statically configured settings which are read at boot time.
>>  * max_indirect_segs: Maximum amount of segments.
>>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>>  * max_queues: Maximum of queues(rings) to be used.
>>
>> But the storage backend, workload, and guest memory result in very different
>> tuning requirements. It's impossible to centrally predict application
>> characteristics so it's best to leave allow the settings can be dynamiclly
>> adjusted based on workload inside the Guest.
>>
>> Usage:
>> Show current values:
>> cat /sys/devices/vbd-xxx/max_indirect_segs
>> cat /sys/devices/vbd-xxx/max_ring_page_order
>> cat /sys/devices/vbd-xxx/max_queues
>>
>> Write new values:
>> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
>> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
>> echo <new value> > /sys/devices/vbd-xxx/max_queues
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> --
>> v2: Add device lock and other comments from Konrad.
>> ---
>>  drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 283 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 10f46a8..9a5ed22 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -46,6 +46,7 @@
>>  #include <linux/scatterlist.h>
>>  #include <linux/bitmap.h>
>>  #include <linux/list.h>
>> +#include <linux/delay.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/xenbus.h>
>> @@ -212,6 +213,11 @@ struct blkfront_info
>>  	/* Save uncomplete reqs and bios for migration. */
>>  	struct list_head requests;
>>  	struct bio_list bio_list;
>> +	/* For dynamic configuration. */
>> +	unsigned int reconfiguring:1;
>> +	int new_max_indirect_segments;
>> +	int new_max_ring_page_order;
>> +	int new_max_queues;
>>  };
>>  
>>  static unsigned int nr_minors;
>> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>>  	for (i = 0; i < info->nr_rings; i++)
>>  		blkif_free_ring(&info->rinfo[i]);
>>  
>> +	/* Remove old xenstore nodes. */
>> +	if (info->nr_ring_pages > 1)
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
>> +
>> +	if (info->nr_rings == 1) {
>> +		if (info->nr_ring_pages == 1) {
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
>> +		} else {
>> +			for (i = 0; i < info->nr_ring_pages; i++) {
>> +				char ring_ref_name[RINGREF_NAME_LEN];
>> +
>> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
>> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
>> +			}
>> +		}
>> +	} else {
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
>> +
>> +		for (i = 0; i < info->nr_rings; i++) {
>> +			char queuename[QUEUE_NAME_LEN];
>> +
>> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
>> +		}
>> +	}
>>  	kfree(info->rinfo);
>>  	info->rinfo = NULL;
>>  	info->nr_rings = 0;
>> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
>>  		info->nr_ring_pages = 1;
>>  	else {
>>  		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
>> +		if (info->new_max_ring_page_order) {
> 
> Instead of calling this "new_max_ring_page_order", could you just call it 
> max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 


Sure, I can do that.


> and use it everywhere instead of xen_blkif_max_ring_order?


But "xen_blkif_max_ring_order" still have to be used here, this is the only place "xen_blkif_max_ring_order" is used(except checking the value of it in xlblk_init()).


> 
>> +			BUG_ON(info->new_max_ring_page_order > max_page_order);
>> +			ring_page_order = info->new_max_ring_page_order;
>> +		}
>>  		info->nr_ring_pages = 1 << ring_page_order;
>>  	}
>>  
>> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
>>  		backend_max_queues = 1;
>>  
>>  	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
>> +	if (info->new_max_queues) {
> 
> Same here IMHO, this is going to make the code flow slightly easier to 
> understand.
> 
>> +		BUG_ON(info->new_max_queues > backend_max_queues);
>> +		info->nr_rings = info->new_max_queues;
>> +	}
>>  	/* We need at least one ring. */
>>  	if (!info->nr_rings)
>>  		info->nr_rings = 1;
>> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
>>  			    NULL);
>>  	if (err)
>>  		info->max_indirect_segments = 0;
>> -	else
>> +	else {
>>  		info->max_indirect_segments = min(indirect_segments,
>>  						  xen_blkif_max_segments);
>> +		if (info->new_max_indirect_segments) {
>> +			BUG_ON(info->new_max_indirect_segments > indirect_segments);
>> +			info->max_indirect_segments = info->new_max_indirect_segments;
>> +		}
>> +	}
>> +}
>> +
>> +static ssize_t max_ring_page_order_show(struct device *dev,
>> +					struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>> +}
>> +
>> +static ssize_t max_indirect_segs_show(struct device *dev,
>> +				      struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->max_indirect_segments);
>> +}
>> +
>> +static ssize_t max_queues_show(struct device *dev,
>> +			       struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->nr_rings);
>> +}
>> +
>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>> +{
>> +	unsigned int i;
>> +	int err = -EBUSY;
>> +
>> +	/*
>> +	 * Make sure no migration in parallel, device lock is actually a
>> +	 * mutex.
>> +	 */
>> +	if (!device_trylock(&info->xbdev->dev)) {
>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>> +			dev_name(&info->xbdev->dev));
>> +		return err;
>> +	}
>> +
>> +	/*
>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>> +	 */
>> +	blk_mq_freeze_queue(info->rq);
>> +	if (part_in_flight(&info->gd->part0))
>> +		goto out;
>> +
>> +	/*
>> +	 * Front 				Backend
>> +	 * Switch to XenbusStateClosed
>> +	 *					frontend_changed():
>> +	 *					 case XenbusStateClosed:
>> +	 *						xen_blkif_disconnect()
>> +	 *						Switch to XenbusStateClosed
>> +	 * blkfront_resume():
>> +	 *					frontend_changed():
>> +	 *						reconnect
>> +	 * Wait until XenbusStateConnected
>> +	 */
>> +	info->reconfiguring = true;
>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>> +
>> +	/* Poll every 100ms, 1 minute timeout. */
>> +	for (i = 0; i < 600; i++) {
>> +		/*
>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>> +		 * will clear reconfiguring.
>> +		 */
>> +		if (!info->reconfiguring)
>> +			goto resume;
>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>> +	}
> 
> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> suspension does? blkback_changed would have to set the frontend state to 
> InitWait when it detects that the backend has switched to Closed, and call 
> blkfront_resume.


I think that won't work.
In the real "resume" case, the power management system will trigger all ->resume() path.
But there is no place for dynamic configuration.

Thanks,
Bob Liu

next prev parent reply	other threads:[~2016-07-21 10:09 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
2016-07-21  8:29   ` Roger Pau Monné
2016-07-21  9:44     ` Bob Liu
2016-07-15  9:31 ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
2016-07-21  8:57   ` Roger Pau Monné
2016-07-21 10:08     ` Bob Liu [this message]
2016-07-22  7:45       ` Roger Pau Monné
2016-07-22  8:17         ` Bob Liu
2016-07-22  9:34           ` Roger Pau Monné
2016-07-22  9:43             ` Bob Liu
2016-07-22 11:45               ` Roger Pau Monné
2016-07-22 22:18                 ` Bob Liu
2016-07-25  9:20                   ` Roger Pau Monné
2016-07-25 10:29                     ` Bob Liu
2016-07-25 10:53                       ` Roger Pau Monné
2016-07-25 11:08                         ` Bob Liu
2016-07-25 12:11                           ` Roger Pau Monné
2016-07-25 12:25                             ` Bob Liu
2016-07-21  8:06 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Roger Pau Monné

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57909F05.9030809@oracle.com \
    --to=bob.liu@oracle.com \
    --cc=jgross@suse.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).