[PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
@ 2016-07-27  3:21 Bob Liu
  2016-07-27  8:07 ` Roger Pau Monné
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Bob Liu @ 2016-07-27  3:21 UTC (permalink / raw)
  To: linux-kernel; +Cc: xen-devel, konrad.wilk, roger.pau, Bob Liu

The current VBD layer reserves buffer space for each attached device based on
three statically configured settings which are read at boot time.
 * max_indirect_segs: Maximum amount of segments.
 * max_ring_page_order: Maximum order of pages to be used for the shared ring.
 * max_queues: Maximum of queues(rings) to be used.

But the storage backend, workload, and guest memory result in very different
tuning requirements. It's impossible to centrally predict application
characteristics so it's best to leave allow the settings can be dynamiclly
adjusted based on workload inside the Guest.

Usage:
Show current values:
cat /sys/devices/vbd-xxx/max_indirect_segs
cat /sys/devices/vbd-xxx/max_ring_page_order
cat /sys/devices/vbd-xxx/max_queues

Write new values:
echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
echo <new value> > /sys/devices/vbd-xxx/max_queues

Signed-off-by: Bob Liu <bob.liu@oracle.com>
--
v3:
 * Remove new_max_indirect_segments.
 * Fix BUG_ON().
v2:
 * Rename to max_ring_page_order.
 * Remove the waiting code suggested by Roger.
---
 drivers/block/xen-blkfront.c |  277 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 269 insertions(+), 8 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 1b4c380..57baa54 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -212,6 +212,10 @@ struct blkfront_info
 	/* Save uncomplete reqs and bios for migration. */
 	struct list_head requests;
 	struct bio_list bio_list;
+	/* For dynamic configuration. */
+	unsigned int reconfiguring:1;
+	unsigned int max_ring_page_order;
+	unsigned int max_queues;
 };
 
 static unsigned int nr_minors;
@@ -1350,6 +1354,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	for (i = 0; i < info->nr_rings; i++)
 		blkif_free_ring(&info->rinfo[i]);
 
+	/* Remove old xenstore nodes. */
+	if (info->nr_ring_pages > 1)
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
+
+	if (info->nr_rings == 1) {
+		if (info->nr_ring_pages == 1) {
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
+		} else {
+			for (i = 0; i < info->nr_ring_pages; i++) {
+				char ring_ref_name[RINGREF_NAME_LEN];
+
+				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
+			}
+		}
+	} else {
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
+
+		for (i = 0; i < info->nr_rings; i++) {
+			char queuename[QUEUE_NAME_LEN];
+
+			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
+		}
+	}
 	kfree(info->rinfo);
 	info->rinfo = NULL;
 	info->nr_rings = 0;
@@ -1763,15 +1792,20 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	const char *message = NULL;
 	struct xenbus_transaction xbt;
 	int err;
-	unsigned int i, max_page_order = 0;
+	unsigned int i, backend_max_order = 0;
 	unsigned int ring_page_order = 0;
 
 	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-			   "max-ring-page-order", "%u", &max_page_order);
+			   "max-ring-page-order", "%u", &backend_max_order);
 	if (err != 1)
 		info->nr_ring_pages = 1;
 	else {
-		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+		if (info->max_ring_page_order)
+			/* Dynamic configured through /sys. */
+			ring_page_order = min(info->max_ring_page_order, backend_max_order);
+		else
+			/* Default. */
+			ring_page_order = min(xen_blkif_max_ring_order, backend_max_order);
 		info->nr_ring_pages = 1 << ring_page_order;
 	}
 
@@ -1894,7 +1928,13 @@ static int negotiate_mq(struct blkfront_info *info)
 	if (err < 0)
 		backend_max_queues = 1;
 
-	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+	if (info->max_queues)
+		/* Dynamic configured through /sys */
+		info->nr_rings = min(backend_max_queues, info->max_queues);
+	else
+		/* Default. */
+		info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+
 	/* We need at least one ring. */
 	if (!info->nr_rings)
 		info->nr_rings = 1;
@@ -2352,11 +2392,198 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 			    NULL);
 	if (err)
 		info->max_indirect_segments = 0;
-	else
-		info->max_indirect_segments = min(indirect_segments,
-						  xen_blkif_max_segments);
+	else {
+		if (info->max_indirect_segments)
+			/* Dynamic configured through /sys */
+			info->max_indirect_segments = min(indirect_segments,
+							  info->max_indirect_segments);
+		else
+			info->max_indirect_segments = min(indirect_segments,
+							  xen_blkif_max_segments);
+	}
+}
+
+static ssize_t max_ring_page_order_show(struct device *dev,
+					struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
+}
+
+static ssize_t max_indirect_segs_show(struct device *dev,
+				      struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->max_indirect_segments);
+}
+
+static ssize_t max_queues_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->nr_rings);
+}
+
+static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
+{
+	/*
+	 * Prevent new requests even to software request queue.
+	 */
+	blk_mq_freeze_queue(info->rq);
+
+	/*
+	 * Guarantee no uncompleted reqs.
+	 */
+	if (part_in_flight(&info->gd->part0) || info->reconfiguring) {
+		blk_mq_unfreeze_queue(info->rq);
+		pr_err("Dev:%s busy, please retry later.\n", dev_name(&info->xbdev->dev));
+		return -EBUSY;
+	}
+
+	/*
+	 * Frontend: 				Backend:
+	 * Freeze_queue()
+	 * Switch to XenbusStateClosed
+	 *					frontend_changed(StateClosed)
+	 *						> xen_blkif_disconnect()
+	 *						> Switch to XenbusStateClosed
+	 * blkback_changed(StateClosed)
+	 *	> blkfront_resume()
+	 *	> Switch to StateInitialised
+	 *					frontend_changed(StateInitialised):
+	 *						> reconnect
+	 *						> Switch to StateConnected
+	 * blkback_changed(StateConnected)
+	 *	> blkif_recover()
+	 *		> Also switch to StateConnected
+	 *		> Unfreeze_queue()
+	 */
+	info->reconfiguring = true;
+	xenbus_switch_state(info->xbdev, XenbusStateClosed);
+
+	return count;
+}
+
+static ssize_t max_indirect_segs_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_segs = 0, backend_max_segs = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_segs);
+	if (ret < 0)
+		return ret;
+
+	if (max_segs == info->max_indirect_segments)
+		return count;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &backend_max_segs,
+			    NULL);
+	if (err) {
+		pr_err("Backend %s doesn't support feature-indirect-segments.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+
+	if (max_segs > backend_max_segs) {
+		pr_err("Invalid max indirect segment (%u), backend-max: %u.\n",
+			max_segs, backend_max_segs);
+		return -EINVAL;
+	}
+
+	info->max_indirect_segments = max_segs;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static ssize_t max_ring_page_order_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_order = 0, backend_max_order = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_order);
+	if (ret < 0)
+		return ret;
+
+	if ((1 << max_order) == info->nr_ring_pages)
+		return count;
+
+	if (max_order > XENBUS_MAX_RING_GRANT_ORDER) {
+		pr_err("Invalid max_ring_page_order (%u), max: %u.\n",
+				max_order, XENBUS_MAX_RING_GRANT_ORDER);
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-ring-page-order", "%u", &backend_max_order);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support feature multi-page-ring.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_order > backend_max_order) {
+		pr_err("Invalid max_ring_page_order (%u), backend supports max: %u.\n",
+			max_order, backend_max_order);
+		return -EINVAL;
+	}
+	info->max_ring_page_order = max_order;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static ssize_t max_queues_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_queues = 0, backend_max_queues = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_queues);
+	if (ret < 0)
+		return ret;
+
+	if (max_queues == info->nr_rings)
+		return count;
+
+	if (max_queues > num_online_cpus()) {
+		pr_err("Invalid max_queues (%u), can't bigger than online cpus: %u.\n",
+			max_queues, num_online_cpus());
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &backend_max_queues);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support block multi queue.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_queues > backend_max_queues) {
+		pr_err("Invalid max_queues (%u), backend supports max: %u.\n",
+			max_queues, backend_max_queues);
+		return -EINVAL;
+	}
+	info->max_queues = max_queues;
+
+	return dynamic_reconfig_device(info, count);
 }
 
+static DEVICE_ATTR_RW(max_queues);
+static DEVICE_ATTR_RW(max_ring_page_order);
+static DEVICE_ATTR_RW(max_indirect_segs);
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -2393,6 +2620,10 @@ static void blkfront_connect(struct blkfront_info *info)
 		 * supports indirect descriptors, and how many.
 		 */
 		blkif_recover(info);
+		if (info->reconfiguring) {
+			blk_mq_unfreeze_queue(info->rq);
+			info->reconfiguring = false;
+		}
 		return;
 
 	default:
@@ -2443,6 +2674,22 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+	if (err)
+		goto fail;
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		goto fail;
+	}
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+		goto fail;
+	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	/* Kick pending requests. */
@@ -2453,6 +2700,12 @@ static void blkfront_connect(struct blkfront_info *info)
 	add_disk(info->gd);
 
 	info->is_ready = 1;
+	return;
+
+fail:
+	blkif_free(info, 0);
+	xlvbd_release_gendisk(info);
+	return;
 }
 
 /**
@@ -2500,8 +2753,16 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosed:
-		if (dev->state == XenbusStateClosed)
+		if (dev->state == XenbusStateClosed) {
+			if (info->reconfiguring)
+				if (blkfront_resume(info->xbdev)) {
+					/* Resume failed. */
+					info->reconfiguring = false;
+					xenbus_switch_state(info->xbdev, XenbusStateClosed);
+					pr_err("Resume from dynamic configuration failed\n");
+				}
 			break;
+		}
 		/* Missed the backend's Closing state -- fallthrough */
 	case XenbusStateClosing:
 		if (info)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  3:21 [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
@ 2016-07-27  8:07 ` Roger Pau Monné
  2016-07-27  9:12   ` Bob Liu
  2016-07-27 10:59 ` Roger Pau Monné
  2016-07-28  9:31 ` [Xen-devel] " David Vrabel
  2 siblings, 1 reply; 11+ messages in thread
From: Roger Pau Monné @ 2016-07-27  8:07 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

Hello,

Thanks for the fixes.

On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
> The current VBD layer reserves buffer space for each attached device based on
> three statically configured settings which are read at boot time.
>  * max_indirect_segs: Maximum amount of segments.
>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>  * max_queues: Maximum of queues(rings) to be used.
> 
> But the storage backend, workload, and guest memory result in very different
> tuning requirements. It's impossible to centrally predict application
> characteristics so it's best to leave allow the settings can be dynamiclly
> adjusted based on workload inside the Guest.
> 
> Usage:
> Show current values:
> cat /sys/devices/vbd-xxx/max_indirect_segs
> cat /sys/devices/vbd-xxx/max_ring_page_order
> cat /sys/devices/vbd-xxx/max_queues
> 
> Write new values:
> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> echo <new value> > /sys/devices/vbd-xxx/max_queues
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> --
> v3:
>  * Remove new_max_indirect_segments.
>  * Fix BUG_ON().
> v2:
>  * Rename to max_ring_page_order.
>  * Remove the waiting code suggested by Roger.
> ---
>  drivers/block/xen-blkfront.c |  277 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 269 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 1b4c380..57baa54 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -212,6 +212,10 @@ struct blkfront_info
>  	/* Save uncomplete reqs and bios for migration. */
>  	struct list_head requests;
>  	struct bio_list bio_list;
> +	/* For dynamic configuration. */
> +	unsigned int reconfiguring:1;
> +	unsigned int max_ring_page_order;
> +	unsigned int max_queues;

max_{ring_page_order/queues} should be after max_indirect_segments, and 
reconfigurng should be of type bool (moreover when below you assign 'true' 
or 'false' to it).

>  };
>  
>  static unsigned int nr_minors;
> @@ -1350,6 +1354,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  	for (i = 0; i < info->nr_rings; i++)
>  		blkif_free_ring(&info->rinfo[i]);
>  
> +	/* Remove old xenstore nodes. */
> +	if (info->nr_ring_pages > 1)
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
> +
> +	if (info->nr_rings == 1) {
> +		if (info->nr_ring_pages == 1) {
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
> +		} else {
> +			for (i = 0; i < info->nr_ring_pages; i++) {
> +				char ring_ref_name[RINGREF_NAME_LEN];
> +
> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
> +			}
> +		}
> +	} else {
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
> +
> +		for (i = 0; i < info->nr_rings; i++) {
> +			char queuename[QUEUE_NAME_LEN];
> +
> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
> +		}
> +	}
>  	kfree(info->rinfo);
>  	info->rinfo = NULL;
>  	info->nr_rings = 0;
> @@ -1763,15 +1792,20 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  	const char *message = NULL;
>  	struct xenbus_transaction xbt;
>  	int err;
> -	unsigned int i, max_page_order = 0;
> +	unsigned int i, backend_max_order = 0;
>  	unsigned int ring_page_order = 0;
>  
>  	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> -			   "max-ring-page-order", "%u", &max_page_order);
> +			   "max-ring-page-order", "%u", &backend_max_order);
>  	if (err != 1)
>  		info->nr_ring_pages = 1;
>  	else {
> -		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
> +		if (info->max_ring_page_order)
> +			/* Dynamic configured through /sys. */
> +			ring_page_order = min(info->max_ring_page_order, backend_max_order);
> +		else
> +			/* Default. */
> +			ring_page_order = min(xen_blkif_max_ring_order, backend_max_order);
>  		info->nr_ring_pages = 1 << ring_page_order;

In order to avoid this 'if' here (and below), what do you think of assigning 
the global values to the blkfront_info structure in blkfront_probe? Then you 
would only have to do min(info->max_ring_page_order, backend_max_order) in 
order to get the min value.

>  	}
>  
> @@ -1894,7 +1928,13 @@ static int negotiate_mq(struct blkfront_info *info)
>  	if (err < 0)
>  		backend_max_queues = 1;
>  
> -	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
> +	if (info->max_queues)
> +		/* Dynamic configured through /sys */
> +		info->nr_rings = min(backend_max_queues, info->max_queues);
> +	else
> +		/* Default. */
> +		info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
> +
>  	/* We need at least one ring. */
>  	if (!info->nr_rings)
>  		info->nr_rings = 1;
> @@ -2352,11 +2392,198 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
>  			    NULL);
>  	if (err)
>  		info->max_indirect_segments = 0;
> -	else
> -		info->max_indirect_segments = min(indirect_segments,
> -						  xen_blkif_max_segments);
> +	else {
> +		if (info->max_indirect_segments)
> +			/* Dynamic configured through /sys */
> +			info->max_indirect_segments = min(indirect_segments,
> +							  info->max_indirect_segments);
> +		else
> +			info->max_indirect_segments = min(indirect_segments,
> +							  xen_blkif_max_segments);
> +	}
> +}
> +
> +static ssize_t max_ring_page_order_show(struct device *dev,
> +					struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
> +}
> +
> +static ssize_t max_indirect_segs_show(struct device *dev,
> +				      struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", info->max_indirect_segments);
> +}
> +
> +static ssize_t max_queues_show(struct device *dev,
> +			       struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", info->nr_rings);
> +}
> +
> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> +{
> +	/*
> +	 * Prevent new requests even to software request queue.
> +	 */
> +	blk_mq_freeze_queue(info->rq);
> +
> +	/*
> +	 * Guarantee no uncompleted reqs.
> +	 */
> +	if (part_in_flight(&info->gd->part0) || info->reconfiguring) {
> +		blk_mq_unfreeze_queue(info->rq);
> +		pr_err("Dev:%s busy, please retry later.\n", dev_name(&info->xbdev->dev));
> +		return -EBUSY;
> +	}
> +
> +	/*
> +	 * Frontend: 				Backend:
> +	 * Freeze_queue()
> +	 * Switch to XenbusStateClosed
> +	 *					frontend_changed(StateClosed)
> +	 *						> xen_blkif_disconnect()
> +	 *						> Switch to XenbusStateClosed
> +	 * blkback_changed(StateClosed)
> +	 *	> blkfront_resume()
> +	 *	> Switch to StateInitialised
> +	 *					frontend_changed(StateInitialised):
> +	 *						> reconnect
> +	 *						> Switch to StateConnected
> +	 * blkback_changed(StateConnected)
> +	 *	> blkif_recover()
> +	 *		> Also switch to StateConnected
> +	 *		> Unfreeze_queue()
> +	 */
> +	info->reconfiguring = true;

This is racy AFAICT. You should use an atomic test_and_set in the condition 
above or lock the blkfront_info mutex before and while setting 
reconfiguring. Without this two (or more) concurrent calls to 
dynamic_reconfig_device might succeed.

> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> +
> +	return count;
> +}
> +
> +static ssize_t max_indirect_segs_store(struct device *dev,
> +		struct device_attribute *attr, const char *buf, size_t count)
> +{
> +	ssize_t ret;
> +	unsigned int max_segs = 0, backend_max_segs = 0;
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +	int err;
> +
> +	ret = kstrtouint(buf, 10, &max_segs);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (max_segs == info->max_indirect_segments)
> +		return count;
> +
> +	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
> +			    "feature-max-indirect-segments", "%u", &backend_max_segs,
> +			    NULL);
> +	if (err) {
> +		pr_err("Backend %s doesn't support feature-indirect-segments.\n",
> +			info->xbdev->otherend);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (max_segs > backend_max_segs) {
> +		pr_err("Invalid max indirect segment (%u), backend-max: %u.\n",
> +			max_segs, backend_max_segs);
> +		return -EINVAL;
> +	}
> +
> +	info->max_indirect_segments = max_segs;
> +
> +	return dynamic_reconfig_device(info, count);
> +}
> +
> +static ssize_t max_ring_page_order_store(struct device *dev,
> +				 struct device_attribute *attr,
> +				 const char *buf, size_t count)
> +{
> +	ssize_t ret;
> +	unsigned int max_order = 0, backend_max_order = 0;
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +	int err;
> +
> +	ret = kstrtouint(buf, 10, &max_order);
> +	if (ret < 0)
> +		return ret;
> +
> +	if ((1 << max_order) == info->nr_ring_pages)
> +		return count;
> +
> +	if (max_order > XENBUS_MAX_RING_GRANT_ORDER) {
> +		pr_err("Invalid max_ring_page_order (%u), max: %u.\n",
> +				max_order, XENBUS_MAX_RING_GRANT_ORDER);
> +		return -EINVAL;
> +	}
> +
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "max-ring-page-order", "%u", &backend_max_order);
> +	if (err != 1) {
> +		pr_err("Backend %s doesn't support feature multi-page-ring.\n",
> +			info->xbdev->otherend);
> +		return -EOPNOTSUPP;
> +	}
> +	if (max_order > backend_max_order) {
> +		pr_err("Invalid max_ring_page_order (%u), backend supports max: %u.\n",
> +			max_order, backend_max_order);
> +		return -EINVAL;
> +	}
> +	info->max_ring_page_order = max_order;
> +
> +	return dynamic_reconfig_device(info, count);
> +}
> +
> +static ssize_t max_queues_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	ssize_t ret;
> +	unsigned int max_queues = 0, backend_max_queues = 0;
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +	int err;
> +
> +	ret = kstrtouint(buf, 10, &max_queues);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (max_queues == info->nr_rings)
> +		return count;
> +
> +	if (max_queues > num_online_cpus()) {
> +		pr_err("Invalid max_queues (%u), can't bigger than online cpus: %u.\n",
> +			max_queues, num_online_cpus());
> +		return -EINVAL;
> +	}
> +
> +	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> +			   "multi-queue-max-queues", "%u", &backend_max_queues);
> +	if (err != 1) {
> +		pr_err("Backend %s doesn't support block multi queue.\n",
> +			info->xbdev->otherend);
> +		return -EOPNOTSUPP;
> +	}
> +	if (max_queues > backend_max_queues) {
> +		pr_err("Invalid max_queues (%u), backend supports max: %u.\n",
> +			max_queues, backend_max_queues);
> +		return -EINVAL;
> +	}
> +	info->max_queues = max_queues;
> +
> +	return dynamic_reconfig_device(info, count);
>  }
>  
> +static DEVICE_ATTR_RW(max_queues);
> +static DEVICE_ATTR_RW(max_ring_page_order);
> +static DEVICE_ATTR_RW(max_indirect_segs);
> +
>  /*
>   * Invoked when the backend is finally 'ready' (and has told produced
>   * the details about the physical device - #sectors, size, etc).
> @@ -2393,6 +2620,10 @@ static void blkfront_connect(struct blkfront_info *info)
>  		 * supports indirect descriptors, and how many.
>  		 */
>  		blkif_recover(info);
> +		if (info->reconfiguring) {
> +			blk_mq_unfreeze_queue(info->rq);
> +			info->reconfiguring = false;
> +		}
>  		return;
>  
>  	default:
> @@ -2443,6 +2674,22 @@ static void blkfront_connect(struct blkfront_info *info)
>  		return;
>  	}
>  
> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> +	if (err)
> +		goto fail;
> +
> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
> +	if (err) {
> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> +		goto fail;
> +	}
> +
> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
> +	if (err) {
> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
> +		goto fail;
> +	}
>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>  
>  	/* Kick pending requests. */
> @@ -2453,6 +2700,12 @@ static void blkfront_connect(struct blkfront_info *info)
>  	add_disk(info->gd);
>  
>  	info->is_ready = 1;
> +	return;
> +
> +fail:
> +	blkif_free(info, 0);
> +	xlvbd_release_gendisk(info);
> +	return;

Hm, I'm not sure whether this chunk should be in a separate patch, it seems 
like blkfront_connect doesn't properly cleanup on error (if 
xlvbd_alloc_gendisk fails blkif_free will not be called). Do you think you 
could send the addition of the 'fail' label as a separate patch and fix the 
error path of xlvbd_alloc_gendisk?

>  }
>  
>  /**
> @@ -2500,8 +2753,16 @@ static void blkback_changed(struct xenbus_device *dev,
>  		break;
>  
>  	case XenbusStateClosed:
> -		if (dev->state == XenbusStateClosed)
> +		if (dev->state == XenbusStateClosed) {
> +			if (info->reconfiguring)
> +				if (blkfront_resume(info->xbdev)) {

Could you please join those two conditions:

if (info->reconfiguring && blkfront_resume(info->xbdev)) { ...

Also, I'm not sure this is correct, if blkfront sees the "Closing" state on 
blkback it will try to close the frontend and destroy the block device (see 
blkfront_closing), and this should be avoided. You should call 
blkfront_resume as soon as you see the backend move to the Closed or Closing 
states, without calling blkfront_closing.

> +					/* Resume failed. */
> +					info->reconfiguring = false;
> +					xenbus_switch_state(info->xbdev, XenbusStateClosed);
> +					pr_err("Resume from dynamic configuration failed\n");
> +				}
>  			break;
> +		}
>  		/* Missed the backend's Closing state -- fallthrough */
>  	case XenbusStateClosing:
>  		if (info)
> -- 
> 1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  8:07 ` Roger Pau Monné
@ 2016-07-27  9:12   ` Bob Liu
  2016-07-27  9:29     ` [Xen-devel] " Paul Durrant
  2016-07-27 10:09     ` Roger Pau Monné
  0 siblings, 2 replies; 11+ messages in thread
From: Bob Liu @ 2016-07-27  9:12 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: linux-kernel, xen-devel, konrad.wilk


On 07/27/2016 04:07 PM, Roger Pau Monné wrote:
..[snip]..
>> @@ -2443,6 +2674,22 @@ static void blkfront_connect(struct blkfront_info *info)
>>  		return;
>>  	}
>>  
>> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
>> +	if (err)
>> +		goto fail;
>> +
>> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
>> +	if (err) {
>> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
>> +		goto fail;
>> +	}
>> +
>> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
>> +	if (err) {
>> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
>> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
>> +		goto fail;
>> +	}
>>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>>  
>>  	/* Kick pending requests. */
>> @@ -2453,6 +2700,12 @@ static void blkfront_connect(struct blkfront_info *info)
>>  	add_disk(info->gd);
>>  
>>  	info->is_ready = 1;
>> +	return;
>> +
>> +fail:
>> +	blkif_free(info, 0);
>> +	xlvbd_release_gendisk(info);
>> +	return;
> 
> Hm, I'm not sure whether this chunk should be in a separate patch, it seems 
> like blkfront_connect doesn't properly cleanup on error (if 
> xlvbd_alloc_gendisk fails blkif_free will not be called). Do you think you 
> could send the addition of the 'fail' label as a separate patch and fix the 
> error path of xlvbd_alloc_gendisk?
> 

Sure, will fix all of your comments above.

>>  }
>>  
>>  /**
>> @@ -2500,8 +2753,16 @@ static void blkback_changed(struct xenbus_device *dev,
>>  		break;
>>  
>>  	case XenbusStateClosed:
>> -		if (dev->state == XenbusStateClosed)
>> +		if (dev->state == XenbusStateClosed) {
>> +			if (info->reconfiguring)
>> +				if (blkfront_resume(info->xbdev)) {
> 
> Could you please join those two conditions:
> 
> if (info->reconfiguring && blkfront_resume(info->xbdev)) { ...
> 
> Also, I'm not sure this is correct, if blkfront sees the "Closing" state on 
> blkback it will try to close the frontend and destroy the block device (see 
> blkfront_closing), and this should be avoided. You should call 
> blkfront_resume as soon as you see the backend move to the Closed or Closing 
> states, without calling blkfront_closing.
> 

I didn't get how this can happen, backend state won't be changed to 'Closing' before blkfront_closing() is called.
So I think current logic is fine.

Btw: could you please ack [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits()?

Thank you!
Bob

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [Xen-devel] [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  9:12   ` Bob Liu
@ 2016-07-27  9:29     ` Paul Durrant
  2016-07-27 10:09     ` Roger Pau Monné
  1 sibling, 0 replies; 11+ messages in thread
From: Paul Durrant @ 2016-07-27  9:29 UTC (permalink / raw)
  To: Bob Liu, Roger Pau Monne
  Cc: xen-devel@lists.xenproject.org, linux-kernel@vger.kernel.org

> -----Original Message-----
[snip]
> >
> > Also, I'm not sure this is correct, if blkfront sees the "Closing" state on
> > blkback it will try to close the frontend and destroy the block device (see
> > blkfront_closing), and this should be avoided. You should call
> > blkfront_resume as soon as you see the backend move to the Closed or
> Closing
> > states, without calling blkfront_closing.
> >
> 
> I didn't get how this can happen, backend state won't be changed to 'Closing'
> before blkfront_closing() is called.
> So I think current logic is fine.
> 

Backends can go to closing before frontends, when the PV device is being detached.

  Paul 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  9:12   ` Bob Liu
  2016-07-27  9:29     ` [Xen-devel] " Paul Durrant
@ 2016-07-27 10:09     ` Roger Pau Monné
  1 sibling, 0 replies; 11+ messages in thread
From: Roger Pau Monné @ 2016-07-27 10:09 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Wed, Jul 27, 2016 at 05:12:22PM +0800, Bob Liu wrote:
> 
> On 07/27/2016 04:07 PM, Roger Pau Monné wrote:
> ..[snip]..
> >> @@ -2443,6 +2674,22 @@ static void blkfront_connect(struct blkfront_info *info)
> >>  		return;
> >>  	}
> >>  
> >> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> >> +	if (err)
> >> +		goto fail;
> >> +
> >> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
> >> +	if (err) {
> >> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> >> +		goto fail;
> >> +	}
> >> +
> >> +	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
> >> +	if (err) {
> >> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
> >> +		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
> >> +		goto fail;
> >> +	}
> >>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
> >>  
> >>  	/* Kick pending requests. */
> >> @@ -2453,6 +2700,12 @@ static void blkfront_connect(struct blkfront_info *info)
> >>  	add_disk(info->gd);
> >>  
> >>  	info->is_ready = 1;
> >> +	return;
> >> +
> >> +fail:
> >> +	blkif_free(info, 0);
> >> +	xlvbd_release_gendisk(info);
> >> +	return;
> > 
> > Hm, I'm not sure whether this chunk should be in a separate patch, it seems 
> > like blkfront_connect doesn't properly cleanup on error (if 
> > xlvbd_alloc_gendisk fails blkif_free will not be called). Do you think you 
> > could send the addition of the 'fail' label as a separate patch and fix the 
> > error path of xlvbd_alloc_gendisk?
> > 
> 
> Sure, will fix all of your comments above.
> 
> >>  }
> >>  
> >>  /**
> >> @@ -2500,8 +2753,16 @@ static void blkback_changed(struct xenbus_device *dev,
> >>  		break;
> >>  
> >>  	case XenbusStateClosed:
> >> -		if (dev->state == XenbusStateClosed)
> >> +		if (dev->state == XenbusStateClosed) {
> >> +			if (info->reconfiguring)
> >> +				if (blkfront_resume(info->xbdev)) {
> > 
> > Could you please join those two conditions:
> > 
> > if (info->reconfiguring && blkfront_resume(info->xbdev)) { ...
> > 
> > Also, I'm not sure this is correct, if blkfront sees the "Closing" state on 
> > blkback it will try to close the frontend and destroy the block device (see 
> > blkfront_closing), and this should be avoided. You should call 
> > blkfront_resume as soon as you see the backend move to the Closed or Closing 
> > states, without calling blkfront_closing.
> > 
> 
> I didn't get how this can happen, backend state won't be changed to 'Closing' before blkfront_closing() is called.
> So I think current logic is fine.

I see, in dynamic_reconfig_device you change the frontend state to "Closed". 
Then if the backend switches to state "Closing" blkfront will call 
blkfront_closing, but that won't do anything because the frontend state is 
already at the "Closed" state, at which point you just wait for the backend 
to finally reach state "Closed" in order to perform the reconnection. I 
stand corrected, I think the current logic is indeed fine.
 
Roger.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  3:21 [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
  2016-07-27  8:07 ` Roger Pau Monné
@ 2016-07-27 10:59 ` Roger Pau Monné
  2016-07-27 11:21   ` Bob Liu
  2016-07-28  9:31 ` [Xen-devel] " David Vrabel
  2 siblings, 1 reply; 11+ messages in thread
From: Roger Pau Monné @ 2016-07-27 10:59 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
[...]
> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> +{
> +	/*
> +	 * Prevent new requests even to software request queue.
> +	 */
> +	blk_mq_freeze_queue(info->rq);
> +
> +	/*
> +	 * Guarantee no uncompleted reqs.
> +	 */

I'm also wondering, why do you need to guarantee that there are no 
uncompleted requests? The resume procedure is going to call blkif_recover 
that will take care of requeuing any unfinished requests that are on the 
ring.

> +	if (part_in_flight(&info->gd->part0) || info->reconfiguring) {
> +		blk_mq_unfreeze_queue(info->rq);
> +		pr_err("Dev:%s busy, please retry later.\n", dev_name(&info->xbdev->dev));
> +		return -EBUSY;
> +	}

Roger.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27 10:59 ` Roger Pau Monné
@ 2016-07-27 11:21   ` Bob Liu
  2016-07-27 14:24     ` Roger Pau Monné
  0 siblings, 1 reply; 11+ messages in thread
From: Bob Liu @ 2016-07-27 11:21 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: linux-kernel, xen-devel, konrad.wilk


On 07/27/2016 06:59 PM, Roger Pau Monné wrote:
> On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
> [...]
>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>> +{
>> +	/*
>> +	 * Prevent new requests even to software request queue.
>> +	 */
>> +	blk_mq_freeze_queue(info->rq);
>> +
>> +	/*
>> +	 * Guarantee no uncompleted reqs.
>> +	 */
> 
> I'm also wondering, why do you need to guarantee that there are no 
> uncompleted requests? The resume procedure is going to call blkif_recover 
> that will take care of requeuing any unfinished requests that are on the 
> ring.
> 

Because there may have requests in the software request queue with more segments than
we can handle(if info->max_indirect_segments is reduced).

The blkif_recover() can't handle this since blk-mq was introduced,
because there is no way to iterate the sw-request queues(blk_fetch_request() can't be used by blk-mq).

So there is a bug in blkif_recover(), I was thinking implement the suspend function of blkfront_driver like:

+static int blkfront_suspend(struct xenbus_device *dev)
+{
+       blk_mq_freeze_queue(info->rq);
+       ..
+}
 static struct xenbus_driver blkfront_driver = {
        .ids  = blkfront_ids,
        .probe = blkfront_probe,
        .remove = blkfront_remove,
+       .suspend = blkfront_suspend,
        .resume = blkfront_resume,

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27 11:21   ` Bob Liu
@ 2016-07-27 14:24     ` Roger Pau Monné
  2016-07-27 23:05       ` Bob Liu
  0 siblings, 1 reply; 11+ messages in thread
From: Roger Pau Monné @ 2016-07-27 14:24 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Wed, Jul 27, 2016 at 07:21:05PM +0800, Bob Liu wrote:
> 
> On 07/27/2016 06:59 PM, Roger Pau Monné wrote:
> > On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
> > [...]
> >> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >> +{
> >> +	/*
> >> +	 * Prevent new requests even to software request queue.
> >> +	 */
> >> +	blk_mq_freeze_queue(info->rq);
> >> +
> >> +	/*
> >> +	 * Guarantee no uncompleted reqs.
> >> +	 */
> > 
> > I'm also wondering, why do you need to guarantee that there are no 
> > uncompleted requests? The resume procedure is going to call blkif_recover 
> > that will take care of requeuing any unfinished requests that are on the 
> > ring.
> > 
> 
> Because there may have requests in the software request queue with more segments than
> we can handle(if info->max_indirect_segments is reduced).
> 
> The blkif_recover() can't handle this since blk-mq was introduced,
> because there is no way to iterate the sw-request queues(blk_fetch_request() can't be used by blk-mq).
> 
> So there is a bug in blkif_recover(), I was thinking implement the suspend function of blkfront_driver like:

Hm, this is a regression and should be fixed ASAP. I'm still not sure I 
follow, don't blk_queue_max_segments change the number of segments the 
requests on the queue are going to have? So that you will only have to 
re-queue the requests already on the ring?

Waiting for the whole queue to be flushed before suspending is IMHO not 
acceptable, it introduces an unbounded delay during migration if the backend 
is slow for some reason.

Roger.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27 14:24     ` Roger Pau Monné
@ 2016-07-27 23:05       ` Bob Liu
  2016-07-28  7:49         ` Roger Pau Monné
  0 siblings, 1 reply; 11+ messages in thread
From: Bob Liu @ 2016-07-27 23:05 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: linux-kernel, xen-devel, konrad.wilk


On 07/27/2016 10:24 PM, Roger Pau Monné wrote:
> On Wed, Jul 27, 2016 at 07:21:05PM +0800, Bob Liu wrote:
>>
>> On 07/27/2016 06:59 PM, Roger Pau Monné wrote:
>>> On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
>>> [...]
>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>>>> +{
>>>> +	/*
>>>> +	 * Prevent new requests even to software request queue.
>>>> +	 */
>>>> +	blk_mq_freeze_queue(info->rq);
>>>> +
>>>> +	/*
>>>> +	 * Guarantee no uncompleted reqs.
>>>> +	 */
>>>
>>> I'm also wondering, why do you need to guarantee that there are no 
>>> uncompleted requests? The resume procedure is going to call blkif_recover 
>>> that will take care of requeuing any unfinished requests that are on the 
>>> ring.
>>>
>>
>> Because there may have requests in the software request queue with more segments than
>> we can handle(if info->max_indirect_segments is reduced).
>>
>> The blkif_recover() can't handle this since blk-mq was introduced,
>> because there is no way to iterate the sw-request queues(blk_fetch_request() can't be used by blk-mq).
>>
>> So there is a bug in blkif_recover(), I was thinking implement the suspend function of blkfront_driver like:
> 
> Hm, this is a regression and should be fixed ASAP. I'm still not sure I 
> follow, don't blk_queue_max_segments change the number of segments the 
> requests on the queue are going to have? So that you will only have to 
> re-queue the requests already on the ring?
> 

That's not enough, request queues were split to software queues and hardware queues since blk-mq was introduced.
We need to consider two more things:
 * Stop new requests be added to software queues before blk_queue_max_segments() is called(still using old 'max-indirect-segments').
   I didn't see other way except call blk_mq_freeze_queue().

 * Requests already in software queues but with old 'max-indirect-segments' also have to be re-queued based on new 'max-indirect-segments'.
   Neither blk-mq API can do this.

> Waiting for the whole queue to be flushed before suspending is IMHO not 
> acceptable, it introduces an unbounded delay during migration if the backend 
> is slow for some reason.
> 

Right, I also hope there is better solution.

-- 
Regards,
-Bob

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27 23:05       ` Bob Liu
@ 2016-07-28  7:49         ` Roger Pau Monné
  0 siblings, 0 replies; 11+ messages in thread
From: Roger Pau Monné @ 2016-07-28  7:49 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Thu, Jul 28, 2016 at 07:05:05AM +0800, Bob Liu wrote:
> 
> On 07/27/2016 10:24 PM, Roger Pau Monné wrote:
> > On Wed, Jul 27, 2016 at 07:21:05PM +0800, Bob Liu wrote:
> >>
> >> On 07/27/2016 06:59 PM, Roger Pau Monné wrote:
> >>> On Wed, Jul 27, 2016 at 11:21:25AM +0800, Bob Liu wrote:
> >>> [...]
> >>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >>>> +{
> >>>> +	/*
> >>>> +	 * Prevent new requests even to software request queue.
> >>>> +	 */
> >>>> +	blk_mq_freeze_queue(info->rq);
> >>>> +
> >>>> +	/*
> >>>> +	 * Guarantee no uncompleted reqs.
> >>>> +	 */
> >>>
> >>> I'm also wondering, why do you need to guarantee that there are no 
> >>> uncompleted requests? The resume procedure is going to call blkif_recover 
> >>> that will take care of requeuing any unfinished requests that are on the 
> >>> ring.
> >>>
> >>
> >> Because there may have requests in the software request queue with more segments than
> >> we can handle(if info->max_indirect_segments is reduced).
> >>
> >> The blkif_recover() can't handle this since blk-mq was introduced,
> >> because there is no way to iterate the sw-request queues(blk_fetch_request() can't be used by blk-mq).
> >>
> >> So there is a bug in blkif_recover(), I was thinking implement the suspend function of blkfront_driver like:
> > 
> > Hm, this is a regression and should be fixed ASAP. I'm still not sure I 
> > follow, don't blk_queue_max_segments change the number of segments the 
> > requests on the queue are going to have? So that you will only have to 
> > re-queue the requests already on the ring?
> > 
> 
> That's not enough, request queues were split to software queues and hardware queues since blk-mq was introduced.
> We need to consider two more things:
>  * Stop new requests be added to software queues before blk_queue_max_segments() is called(still using old 'max-indirect-segments').
>    I didn't see other way except call blk_mq_freeze_queue().

Right, stopping the queues doesn't seem to be an issue?

>  * Requests already in software queues but with old 'max-indirect-segments' also have to be re-queued based on new 'max-indirect-segments'.
>    Neither blk-mq API can do this.

I'm afraid I don't know much about the new blk-mq API, you will have to ask 
the blk-mq maintainers for solutions, since this was possible with the 
previous API (and I would consider a regression that the new blk-mq API 
doesn't allow it).

Roger.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-27  3:21 [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
  2016-07-27  8:07 ` Roger Pau Monné
  2016-07-27 10:59 ` Roger Pau Monné
@ 2016-07-28  9:31 ` David Vrabel
  2 siblings, 0 replies; 11+ messages in thread
From: David Vrabel @ 2016-07-28  9:31 UTC (permalink / raw)
  To: Bob Liu, linux-kernel; +Cc: xen-devel, roger.pau

On 27/07/16 04:21, Bob Liu wrote:
> The current VBD layer reserves buffer space for each attached device based on
> three statically configured settings which are read at boot time.
>  * max_indirect_segs: Maximum amount of segments.
>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>  * max_queues: Maximum of queues(rings) to be used.

I think you want a single knob for "maximum number of pages in flight".
 This is much easier for an admin to relate to resource usage.

You could apply this limit without having to reconfigure, which would
simplify this patch quite a bit.

David

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-07-28  9:31 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-27  3:21 [PATCH v3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
2016-07-27  8:07 ` Roger Pau Monné
2016-07-27  9:12   ` Bob Liu
2016-07-27  9:29     ` [Xen-devel] " Paul Durrant
2016-07-27 10:09     ` Roger Pau Monné
2016-07-27 10:59 ` Roger Pau Monné
2016-07-27 11:21   ` Bob Liu
2016-07-27 14:24     ` Roger Pau Monné
2016-07-27 23:05       ` Bob Liu
2016-07-28  7:49         ` Roger Pau Monné
2016-07-28  9:31 ` [Xen-devel] " David Vrabel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox