Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH v3 07/11] virtio: net: Move out vq initialization into separate function
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

The probe and PM restore functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/net/virtio_net.c |   47 +++++++++++++++++++++++++++------------------
 1 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6ee8410..6baa563 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -954,15 +954,38 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 	virtnet_update_status(vi);
 }
 
+static int init_vqs(struct virtnet_info *vi)
+{
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs, err;
+
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+
+	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
+	if (err)
+		return err;
+
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+		vi->cvq = vqs[2];
+
+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
+	}
+	return 0;
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -1034,24 +1057,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	err = init_vqs(vi);
 	if (err)
 		goto free_stats;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
-
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vqs[2];
-
-		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
-			dev->features |= NETIF_F_HW_VLAN_FILTER;
-	}
-
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 06/11] virtio: blk: Add freeze, restore handlers to support S4
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

Delete the vq and flush any pending requests from the block queue on the
freeze callback to prepare for hibernation.

Re-create the vq in the restore callback to resume normal function.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/block/virtio_blk.c |   36 ++++++++++++++++++++++++++++++++++++
 1 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 467f218..f73fb2d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -568,6 +568,38 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
 	ida_simple_remove(&vd_index_ida, index);
 }
 
+#ifdef CONFIG_PM
+static int virtblk_freeze(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	flush_work(&vblk->config_work);
+	spin_lock_irq(vblk->disk->queue->queue_lock);
+	blk_stop_queue(vblk->disk->queue);
+	spin_unlock_irq(vblk->disk->queue->queue_lock);
+	blk_sync_queue(vblk->disk->queue);
+
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtblk_restore(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+	int ret;
+
+	ret = init_vq(vdev->priv);
+	if (!ret) {
+		spin_lock_irq(vblk->disk->queue->queue_lock);
+		blk_start_queue(vblk->disk->queue);
+		spin_unlock_irq(vblk->disk->queue->queue_lock);
+	}
+	return ret;
+}
+#endif
+
 static const struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 	{ 0 },
@@ -593,6 +625,10 @@ static struct virtio_driver __refdata virtio_blk = {
 	.probe			= virtblk_probe,
 	.remove			= __devexit_p(virtblk_remove),
 	.config_changed		= virtblk_config_changed,
+#ifdef CONFIG_PM
+	.freeze			= virtblk_freeze,
+	.restore		= virtblk_restore,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 05/11] virtio: blk: Move out vq initialization to separate function
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

The probe and PM restore functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/block/virtio_blk.c |   19 ++++++++++++++-----
 1 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4d0b70a..467f218 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -349,6 +349,18 @@ static void virtblk_config_changed(struct virtio_device *vdev)
 	queue_work(virtblk_wq, &vblk->config_work);
 }
 
+static int init_vq(struct virtio_blk *vblk)
+{
+	int err = 0;
+
+	/* We expect one virtqueue, for output. */
+	vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
+	if (IS_ERR(vblk->vq))
+		err = PTR_ERR(vblk->vq);
+
+	return err;
+}
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -390,12 +402,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 
-	/* We expect one virtqueue, for output. */
-	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
-	if (IS_ERR(vblk->vq)) {
-		err = PTR_ERR(vblk->vq);
+	err = init_vq(vblk);
+	if (err)
 		goto out_free_vblk;
-	}
 
 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
 	if (!vblk->pool) {
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 04/11] virtio: console: Add freeze and restore handlers to support S4
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

Remove all vqs and associated buffers in the freeze callback which
prepares us to go into hibernation state.  On restore, re-create all the
vqs and populate the input vqs with buffers to get to the pre-hibernate
state.

Note: Any outstanding unconsumed buffers are discarded; which means
there's a possibility of data loss in case the host or the guest didn't
consume any data already present in the vqs.  This can be addressed in a
later patch series, perhaps in virtio common code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c |   58 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e14f5aa..fd2fd6f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1844,6 +1844,60 @@ static unsigned int features[] = {
 	VIRTIO_CONSOLE_F_MULTIPORT,
 };
 
+#ifdef CONFIG_PM
+static int virtcons_freeze(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+
+	portdev = vdev->priv;
+
+	vdev->config->reset(vdev);
+
+	cancel_work_sync(&portdev->control_work);
+	remove_controlq_data(portdev);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		/*
+		 * We'll ask the host later if the new invocation has
+		 * the port opened or closed.
+		 */
+		port->host_connected = false;
+		remove_port_data(port);
+	}
+	remove_vqs(portdev);
+
+	return 0;
+}
+
+static int virtcons_restore(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+	int ret;
+
+	portdev = vdev->priv;
+
+	ret = init_vqs(portdev);
+	if (ret)
+		return ret;
+
+	if (use_multiport(portdev))
+		fill_queue(portdev->c_ivq, &portdev->cvq_lock);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		port->in_vq = portdev->in_vqs[port->id];
+		port->out_vq = portdev->out_vqs[port->id];
+
+		fill_queue(port->in_vq, &port->inbuf_lock);
+
+		/* Get port open/close status on the host */
+		send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+	}
+	return 0;
+}
+#endif
+
 static struct virtio_driver virtio_console = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
@@ -1853,6 +1907,10 @@ static struct virtio_driver virtio_console = {
 	.probe =	virtcons_probe,
 	.remove =	virtcons_remove,
 	.config_changed = config_intr,
+#ifdef CONFIG_PM
+	.freeze =	virtcons_freeze,
+	.restore =	virtcons_restore,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 03/11] virtio: console: Move out vq and vq buf removal into separate functions
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

This common code will be shared with the PM freeze function.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c |   68 ++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 8e3c46d..e14f5aa 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1271,6 +1271,20 @@ static void remove_port(struct kref *kref)
 	kfree(port);
 }
 
+static void remove_port_data(struct port *port)
+{
+	struct port_buffer *buf;
+
+	/* Remove unused data this port might have received. */
+	discard_port_data(port);
+
+	reclaim_consumed_buffers(port);
+
+	/* Remove buffers we queued up for the Host to send us data in. */
+	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
+		free_buf(buf);
+}
+
 /*
  * Port got unplugged.  Remove port from portdev's list and drop the
  * kref reference.  If no userspace has this port opened, it will
@@ -1278,8 +1292,6 @@ static void remove_port(struct kref *kref)
  */
 static void unplug_port(struct port *port)
 {
-	struct port_buffer *buf;
-
 	spin_lock_irq(&port->portdev->ports_lock);
 	list_del(&port->list);
 	spin_unlock_irq(&port->portdev->ports_lock);
@@ -1300,14 +1312,7 @@ static void unplug_port(struct port *port)
 		hvc_remove(port->cons.hvc);
 	}
 
-	/* Remove unused data this port might have received. */
-	discard_port_data(port);
-
-	reclaim_consumed_buffers(port);
-
-	/* Remove buffers we queued up for the Host to send us data in. */
-	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf);
+	remove_port_data(port);
 
 	/*
 	 * We should just assume the device itself has gone off --
@@ -1659,6 +1664,28 @@ static const struct file_operations portdev_fops = {
 	.owner = THIS_MODULE,
 };
 
+static void remove_vqs(struct ports_device *portdev)
+{
+	portdev->vdev->config->del_vqs(portdev->vdev);
+	kfree(portdev->in_vqs);
+	kfree(portdev->out_vqs);
+}
+
+static void remove_controlq_data(struct ports_device *portdev)
+{
+	struct port_buffer *buf;
+	unsigned int len;
+
+	if (!use_multiport(portdev))
+		return;
+
+	while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
+		free_buf(buf);
+
+	while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
+		free_buf(buf);
+}
+
 /*
  * Once we're further in boot, we get probed like any other virtio
  * device.
@@ -1764,9 +1791,7 @@ free_vqs:
 	/* The host might want to notify mgmt sw about device add failure */
 	__send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
 			   VIRTIO_CONSOLE_DEVICE_READY, 0);
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
+	remove_vqs(portdev);
 free_chrdev:
 	unregister_chrdev(portdev->chr_major, "virtio-portsdev");
 free:
@@ -1804,21 +1829,8 @@ static void virtcons_remove(struct virtio_device *vdev)
 	 * have to just stop using the port, as the vqs are going
 	 * away.
 	 */
-	if (use_multiport(portdev)) {
-		struct port_buffer *buf;
-		unsigned int len;
-
-		while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-			free_buf(buf);
-
-		while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-			free_buf(buf);
-	}
-
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
-
+	remove_controlq_data(portdev);
+	remove_vqs(portdev);
 	kfree(portdev);
 }
 
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 02/11] virtio: pci: add PM notification handlers for restore, freeze, thaw, poweroff
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

Handle restore and freeze notification from the PM core.  Expose these
to individual virtio drivers that can quiesce and resume vq operations.

These functions also save device-specific data so that the device can be
put in pre-suspend state after resume, and disable and enable the PCI
device in the freeze and resume functions, respectively.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_pci.c |   50 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h      |    4 +++
 2 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 5d36bc0..0db6f98 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -55,6 +55,10 @@ struct virtio_pci_device
 	unsigned msix_vectors;
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
+
+	/* Status saved during hibernate/restore */
+	u8 saved_status;
+
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -708,9 +712,55 @@ static int virtio_pci_resume(struct device *dev)
 	return 0;
 }
 
+static int virtio_pci_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = 0;
+	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
+	if (drv && drv->freeze)
+		ret = drv->freeze(&vp_dev->vdev);
+
+	if (!ret)
+		pci_disable_device(pci_dev);
+	return ret;
+}
+
+static int virtio_pci_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = pci_enable_device(pci_dev);
+	if (ret)
+		return ret;
+	pci_set_master(pci_dev);
+	vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+	vp_finalize_features(&vp_dev->vdev);
+	if (drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	return ret;
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
 	.suspend = virtio_pci_suspend,
 	.resume  = virtio_pci_resume,
+	.freeze  = virtio_pci_freeze,
+	.thaw    = virtio_pci_restore,
+	.restore = virtio_pci_restore,
+	.poweroff = virtio_pci_suspend,
 };
 #endif
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4c069d8..1c26416 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -146,6 +146,10 @@ struct virtio_driver {
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
+#ifdef CONFIG_PM
+	int (*freeze)(struct virtio_device *dev);
+	int (*restore)(struct virtio_device *dev);
+#endif
 };
 
 int register_virtio_driver(struct virtio_driver *drv);
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 01/11] virtio: pci: switch to new PM API
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <cover.1321530505.git.amit.shah@redhat.com>

The older PM API doesn't have a way to get notifications on hibernate
events.  Switch to the newer one that gives us those notifications.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_pci.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1bf41..5d36bc0 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -690,19 +690,28 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
 }
 
 #ifdef CONFIG_PM
-static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
+static int virtio_pci_suspend(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_save_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D3hot);
 	return 0;
 }
 
-static int virtio_pci_resume(struct pci_dev *pci_dev)
+static int virtio_pci_resume(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_restore_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D0);
 	return 0;
 }
+
+static const struct dev_pm_ops virtio_pci_pm_ops = {
+	.suspend = virtio_pci_suspend,
+	.resume  = virtio_pci_resume,
+};
 #endif
 
 static struct pci_driver virtio_pci_driver = {
@@ -711,8 +720,7 @@ static struct pci_driver virtio_pci_driver = {
 	.probe		= virtio_pci_probe,
 	.remove		= __devexit_p(virtio_pci_remove),
 #ifdef CONFIG_PM
-	.suspend	= virtio_pci_suspend,
-	.resume		= virtio_pci_resume,
+	.driver.pm	= &virtio_pci_pm_ops,
 #endif
 };
 
-- 
1.7.7.1

^ permalink raw reply related

* [PATCH v3 00/11] virtio: S4 support
From: Amit Shah @ 2011-11-17 11:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List

Hi,

These patches add support for S4 to virtio (pci) and all drivers.

For each driver, all vqs are removed before hibernation, and then
re-created after restore.  Some driver-specific uninit and init work
is also done in the freeze and restore functions.

All the drivers in testing work fine:

* virtio-blk is used for the only disk in the VM, IO works fine before
  and after.

* virtio-console: port IO keeps working fine before and after.
  * If a port is waiting for data from the host (blocking read(2)
    call), this works fine in both the cases: host-side connection is
    available or unavailable after resume.  In case the host-side
    connection isn't available, the blocking call is terminated.  If
    it is available, the call continues to remain in blocked state
    till further data arrives.

* virtio-net: ping remains active across S4.

* virtio-balloon: Works fine before and after.  Forgets the ballooned
  value across S4.  If it's desirable to maintain the ballooned value,
  a new config option can be created to do this.

All of these tests are run in parallel.

I have some more tests lined up on similar lines above.  I'll reply
here if something breaks.

Please review and apply if appropriate,

v3:
 - Reset vqs before deleting them (Sasha Levin)
 - Flush block queue before freeze (Rusty)
 - Detach netdev before freeze (Michael S. Tsirkin)

v2:
 - fix checkpatch errors/warnings

Amit Shah (11):
  virtio: pci: switch to new PM API
  virtio: pci: add PM notification handlers for restore, freeze, thaw,
    poweroff
  virtio: console: Move out vq and vq buf removal into separate
    functions
  virtio: console: Add freeze and restore handlers to support S4
  virtio: blk: Move out vq initialization to separate function
  virtio: blk: Add freeze, restore handlers to support S4
  virtio: net: Move out vq initialization into separate function
  virtio: net: Move out vq and vq buf removal into separate function
  virtio: net: Add freeze, restore handlers to support S4
  virtio: balloon: Move out vq initialization into separate function
  virtio: balloon: Add freeze, restore handlers to support S4

 drivers/block/virtio_blk.c      |   55 +++++++++++++++--
 drivers/char/virtio_console.c   |  126 ++++++++++++++++++++++++++++++---------
 drivers/net/virtio_net.c        |   99 ++++++++++++++++++++++--------
 drivers/virtio/virtio_balloon.c |   68 +++++++++++++++------
 drivers/virtio/virtio_pci.c     |   66 +++++++++++++++++++-
 include/linux/virtio.h          |    4 +
 6 files changed, 336 insertions(+), 82 deletions(-)

-- 
1.7.7.1

^ permalink raw reply

* Re: [RFC PATCH net-next v2] enable virtio_net to return bus_info in ethtool -i consistent with emulated NICs
From: David Miller @ 2011-11-16 22:27 UTC (permalink / raw)
  To: raj; +Cc: netdev, virtualization, mst
In-Reply-To: <20111115001708.72ED0290054F@tardy>

From: raj@tardy.cup.hp.com (Rick Jones)
Date: Mon, 14 Nov 2011 16:17:08 -0800 (PST)

> From: Rick Jones <rick.jones2@hp.com>
> 
> Add a new .bus_name to virtio_config_ops then modify virtio_net to 
> call through to it in an ethtool .get_drvinfo routine to report
> bus_info in ethtool -i output which is consistent with other
> emulated NICs and the output of lspci.  
> 
> Signed-off-by: Rick Jones <rick.jones2@hp.com>
> 
> ---
> 
> The changes to drivers/lguest/lguest_device.c, drivers/s390/kvm/kvm_virtio.c,
> and drivers/virtio/virtio_mmio.c code inspected only, not compiled.

Applied, thanks Rick.

^ permalink raw reply

* Re: [PATCH] virtio-mmio: Devices parameter parsing
From: Pawel Moll @ 2011-11-16 18:13 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Peter Maydell, linux-kernel@vger.kernel.org, Sasha Levin,
	virtualization@lists.linux-foundation.org
In-Reply-To: <87sjlooq3m.fsf@rustcorp.com.au>

On Wed, 2011-11-16 at 00:42 +0000, Rusty Russell wrote:
> On Tue, 15 Nov 2011 13:53:05 +0000, Pawel Moll <pawel.moll@arm.com> wrote:
> > +static char *virtio_mmio_cmdline_devices;
> > +module_param_named(devices, virtio_mmio_cmdline_devices, charp, 0);
> 
> This is the wrong way to do this.
> 
> Don't put things in a charp and process it later.  It's lazy.  

Definitely not lazy - string parsing is very absorbing, really! ;-)

> You
> should write parsers for it and call it straight from module_param.
> 
> And if you do it that way, multiple devices are simply multiple
> arguments.
>
> module_param_cb(device, &param_ops_virtio_mmio, NULL, 0400);

Hm. Honestly, first time I hear someone suggesting using the param_cb
variant... It doesn't seem to be too popular ;-)

$ git grep ^module_param\(.\*charp.\*\)\; | wc -l
159
$ git grep ^module_param_cb\(.\*\)\; | wc -l
7

But anyway, I tried to do use your suggestion (see below), even if I'm
not convinced it's winning anything. But, in order to use the strsep()
and kstrtoull() I need a non-const version of the string. And as the
slab is not available at the time, I can't simply do kstrdup(), I'd have
to abuse the "const char *val" params_ops.set's argument...
Interestingly charp operations have the same problem:

int param_set_charp(const char *val, const struct kernel_param *kp)
<...>
        /* This is a hack.  We can't kmalloc in early boot, and we
         * don't need to; this mangled commandline is preserved. */
        if (slab_is_available()) {

Also, regarding the fact that one parameter define more than one
"entity" - this is how mtd partitions are defined (all similarities
intended ;-), see "drivers/mtd/cmdlinepart.c". And I quite like this
syntax...

There's one more thing I realize I missed. The platform devices are
registered starting from id 0 (so as "virtio-mmio.0"). Now, if you
happened to have a statically defined virtio-mmio with the same id,
there would be a clash. So I wanted to add a "first_id" parameter, but
with the _cb parameter I can't guarantee ordering (I mean, to have the
"first_id" available _before_ first device is being instantiated). So
I'd have to cache the devices and then create them in one go. Sounds
like the charp parameter for me :-)

So, unless you have really strong feelings about it, I'd stick to the
single-string version, I'll just add the "first_id" parameter tomorrow.

Cheers!

Paweł

8<---------------------------------------

/* Devices list parameter */

#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)

static struct device virtio_mmio_cmdline_parent = {
	.init_name = "virtio-mmio-cmdline",
};

static int virtio_mmio_cmdline_parent_registered;
static int virtio_mmio_cmdline_id;

static int virtio_mmio_register_cmdline_device(const char *device,
		const struct kernel_param *kp)
{
	int err;
	struct resource resources[] = {
		{
			.flags = IORESOURCE_IRQ,
		}, {
			.flags = IORESOURCE_MEM,
		}
	};
	char *__token, *token, *size, *base;
	unsigned long long val;

	token = __token = kstrdup(device, GFP_KERNEL);
	printk("token=%s\n", token);

	/* Split memory and IRQ resources */
	pr_info("base=%s, token=%s\n", base, token);
	if (base == token || !token || !*token)
		goto error;

	/* Get IRQ */
	if (kstrtoull(token, 0, &val) != 0)
		goto error;
	resources[0].start = val;
	resources[0].end = val;

	/* Split base address and size */
	size = strsep(&base, "@");
	if (size == base || !base || !*base)
		pr_err("No base in '%s'!\n", device);

	/* Get base address */
	if (kstrtoull(base, 0, &val) != 0)
		pr_err("Wrong base in '%s'!\n", device);
	resources[1].start = val;
	resources[1].end = val;

	/* Get size */
	resources[1].end += memparse(size, &token) - 1;
	if (size == token || *token)
		goto error;

	kfree(__token);

	pr_info("Registering device virtio-mmio.%d at 0x%x-0x%x, IRQ %u.\n",
			virtio_mmio_cmdline_id, resources[1].start,
			resources[1].end, resources[0].start);

	if (!virtio_mmio_cmdline_parent_registered) {
		err = device_register(&virtio_mmio_cmdline_parent);
		if (err) {
			pr_err("Failed to register parent device!\n");
			return err;
		}
		virtio_mmio_cmdline_parent_registered = 1;
	}

	return platform_device_register_resndata(&virtio_mmio_cmdline_parent,
			"virtio-mmio", virtio_mmio_cmdline_id++,
			resources, ARRAY_SIZE(resources), NULL, 0) != NULL;

error:
	kfree(__token);
	return -EINVAL;
}

static struct kernel_param_ops virtio_mmio_cmdline_param_ops = {
	.set = virtio_mmio_register_cmdline_device,
};

module_param_cb(device, &virtio_mmio_cmdline_param_ops, NULL, 0);



_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [RFC] kvm tools: Add support for virtio-mmio
From: Sasha Levin @ 2011-11-16 13:30 UTC (permalink / raw)
  To: Pawel Moll
  Cc: Peter Maydell, kvm@vger.kernel.org, asias.hejun@gmail.com,
	virtualization@lists.linux-foundation.org, penberg@kernel.org,
	gorcunov@gmail.com, mingo@elte.hu
In-Reply-To: <1321449718.3137.258.camel@hornet.cambridge.arm.com>

On Wed, 2011-11-16 at 13:21 +0000, Pawel Moll wrote:
> On Tue, 2011-11-15 at 17:56 +0000, Sasha Levin wrote:
> > Hmm... If thats the plan, it should probably be a virtio thing (not
> > virtio-mmio specific).
> > 
> > Either way, it could also use some clarification in the spec.
> 
> Well, the spec (p. 2.1) says: "The Subsystem Vendor ID should reflect
> the PCI Vendor ID of the environment (it's currently only used for
> informational purposes by the guest).". The fact is that all the current
> virtio drivers simply ignore this field. So unless this changes I simply
> have no idea how to describe that register. "Put anything there, no one
> cares"? "Write zero now, may change in future"? Any ideas welcomed.
> 
> Cheers!
> 
> Paweł
> 
> PS. Thanks for defending my honour in the delayed-explosive-device
> thread ;-)

We can add an appendix to the virtio spec with known virtio subsystem
vendors, patch QEMU & KVM tool to pass that, and possibly modify the
QEMU related workarounds in the kernel to only do the workaround thing
if QEMU is set as the vendor.

-- 

Sasha.

^ permalink raw reply

* Re: [RFC] kvm tools: Add support for virtio-mmio
From: Pawel Moll @ 2011-11-16 13:21 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Peter Maydell, kvm@vger.kernel.org, asias.hejun@gmail.com,
	virtualization@lists.linux-foundation.org, penberg@kernel.org,
	gorcunov@gmail.com, mingo@elte.hu
In-Reply-To: <1321379773.3200.9.camel@lappy>

On Tue, 2011-11-15 at 17:56 +0000, Sasha Levin wrote:
> Hmm... If thats the plan, it should probably be a virtio thing (not
> virtio-mmio specific).
> 
> Either way, it could also use some clarification in the spec.

Well, the spec (p. 2.1) says: "The Subsystem Vendor ID should reflect
the PCI Vendor ID of the environment (it's currently only used for
informational purposes by the guest).". The fact is that all the current
virtio drivers simply ignore this field. So unless this changes I simply
have no idea how to describe that register. "Put anything there, no one
cares"? "Write zero now, may change in future"? Any ideas welcomed.

Cheers!

Paweł

PS. Thanks for defending my honour in the delayed-explosive-device
thread ;-)


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: jason wang @ 2011-11-16 10:05 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo
In-Reply-To: <OF1D2CD2A7.41E9E5B7-ON6525794A.0031AD22-6525794A.00321BC0@in.ibm.com>

On 11/16/2011 05:09 PM, Krishna Kumar2 wrote:
> jason wang <jasowang@redhat.com> wrote on 11/16/2011 11:40:45 AM:
>
> Hi Jason,
>
>> Have any thought in mind to solve the issue of flow handling?
> So far nothing concrete.
>
>> Maybe some performance numbers first is better, it would let us know
>> where we are. During the test of my patchset, I find big regression of
>> small packet transmission, and more retransmissions were noticed. This
>> maybe also the issue of flow affinity. One interesting things is to see
>> whether this happens in your patches :)
> I haven't got any results for small packet, but will run this week
> and send an update. I remember my earlier patches having regression
> for small packets.
>
>> I've played with a basic flow director implementation based on my series
>> which want to make sure the packets of a flow was handled by the same
>> vhost thread/guest vcpu. This is done by:
>>
>> - bind virtqueue to guest cpu
>> - record the hash to queue mapping when guest sending packets and use
>> this mapping to choose the virtqueue when forwarding packets to guest
>>
>> Test shows some help during for receiving packets from external host and
>> packet sending to local host. But it would hurt the performance of
>> sending packets to remote host. This is not the perfect solution as it
>> can not handle guest moving processes among vcpus, I plan to try
>> accelerate RFS and sharing the mapping between host and guest.
>>
>> Anyway this is just for receiving, the small packet sending need more
>> thoughts.
> I don't recollect small packet performance for guest->local host.
> Also, using multiple tuns devices on the bridge (instead of mq-tun)
> balances the rx/tx of a flow to a single vq. Then you can avoid
> mq-tun with it's queue selector function, etc. Have you tried it?

I remember it works when I test your patchset early this year, but don't
measure its performance. If multiple tuns devices were used, the mac
address table would be updated very frequently and packets can not be
forwarded in parallel ( unless we make bridge to support multiqueue ).

>
> I will run my tests this week and get back.
>
> thanks,
>
> - KK
>

^ permalink raw reply

* Re: [PATCHv2 RFC] virtio-spec: flexible configuration layout
From: Michael S. Tsirkin @ 2011-11-16  9:09 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Krishna Kumar, kvm, Pawel Moll, Wang Sheng-Hui,
	Alexey Kardashevskiy, lkml - Kernel Mailing List, virtualization,
	penberg, Christian Borntraeger, avi, Amit Shah
In-Reply-To: <1321431459.3221.4.camel@lappy>

On Wed, Nov 16, 2011 at 10:17:39AM +0200, Sasha Levin wrote:
> On Wed, 2011-11-16 at 09:21 +0200, Michael S. Tsirkin wrote:
> > On Wed, Nov 16, 2011 at 10:28:52AM +1030, Rusty Russell wrote:
> > > On Fri, 11 Nov 2011 09:39:13 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> > > > On Fri, Nov 11, 2011 at 6:24 AM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> > > > > (2) There's no huge win in keeping the same layout.  Let's make some
> > > > >    cleanups.  There are more users ahead of us then behind us (I
> > > > >    hope!).
> > > > 
> > > > Actually, if we already do cleanups, here are two more suggestions:
> > > > 
> > > > 1. Make 64bit features a one big 64bit block, instead of having 32bits
> > > > in one place and 32 in another.
> > > > 2. Remove the reserved fields out of the config (the ones that were
> > > > caused by moving the ISR and the notifications out).
> > > 
> > > Yes, those were exactly what I was thinking.  I left it vague because
> > > there might be others you can see if we're prepared to abandon the
> > > current format.
> > > 
> > > Cheers,
> > > Rusty.
> > 
> > Yes but driver code doesn't get any cleaner by moving the fields.
> > And in fact, the legacy support makes the code messier.
> > What are the advantages?
> > 

The advantages question is what should really balance out the overhead.

> What about splitting the parts which handle legacy code and new code?

Well, I considered that. Something along the lines of
#define VIRTIO_NEW_MSI_CONFIG_VECTOR        18
And so on for all registers.

This seems to add a significant maintainance burden because of code
duplication. Note that, for example, vector programming is affected.
Multiply that by the number of guest OSes.


> It'll make it easier playing with the new spec more freely

I'm really worried about maintaing drivers long term.
Ease of experimentation is secondary for me.

> and will also
> make it easier removing legacy code in the future since you'll need to
> simply delete a chunk of code instead of removing legacy bits out of
> working code with a surgical knife.

It's unlikely to be a single chunk: we'd have structures and macros
which are separate. So at least 3 chunks.

Just for fun, here's what's involved in removing legacy map
support on top of my patch. As you see there are 4 chunks:
structure decl, map, unmap, and msix enable/disable.
And finding them was as simple as looking for legacy_map.


---

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index d242fcc..6c4d2faf 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -64,9 +64,6 @@ struct virtio_pci_device
 
 	/* Various IO mappings: used for resource tracking only. */
 
-	/* Legacy BAR0: typically PIO. */
-	void __iomem *legacy_map;
-
 	/* Mappings specified by device capabilities: typically in MMIO */
 	void __iomem *isr_map;
 	void __iomem *notify_map;
@@ -81,11 +78,7 @@ struct virtio_pci_device
 static void virtio_pci_set_msix_enabled(struct virtio_pci_device *vp_dev, int enabled)
 {
 	vp_dev->msix_enabled = enabled;
-	if (vp_dev->device_map)
-		vp_dev->ioaddr_device = vp_dev->device_map;
-	else
-		vp_dev->ioaddr_device = vp_dev->legacy_map +
-			VIRTIO_PCI_CONFIG(vp_dev);
+	vp_dev->ioaddr_device = vp_dev->device_map;
 }
 
 static void __iomem *virtio_pci_map_cfg(struct virtio_pci_device *vp_dev, u8 cap_id,
@@ -147,8 +140,6 @@ err:
 
 static void virtio_pci_iounmap(struct virtio_pci_device *vp_dev)
 {
-	if (vp_dev->legacy_map)
-		pci_iounmap(vp_dev->pci_dev, vp_dev->legacy_map);
 	if (vp_dev->isr_map)
 		pci_iounmap(vp_dev->pci_dev, vp_dev->isr_map);
 	if (vp_dev->notify_map)
@@ -176,36 +167,15 @@ static int virtio_pci_iomap(struct virtio_pci_device *vp_dev)
 
 	if (!vp_dev->notify_map || !vp_dev->common_map ||
 	    !vp_dev->device_map) {
-		/*
-		 * If not all capabilities present, map legacy PIO.
-		 * Legacy access is at BAR 0. We never need to map
-		 * more than 256 bytes there, since legacy config space
-		 * used PIO which has this size limit.
-		 * */
-		vp_dev->legacy_map = pci_iomap(vp_dev->pci_dev, 0, 256);
-		if (!vp_dev->legacy_map) {
-			dev_err(&vp_dev->vdev.dev, "Unable to map legacy PIO");
-			goto err;
-		}
+		dev_err(&vp_dev->vdev.dev, "Unable to map IO");
+		goto err;
 	}
 
-	/* Prefer MMIO if available. If not, fallback to legacy PIO. */
-	if (vp_dev->common_map)
-		vp_dev->ioaddr = vp_dev->common_map;
-	else
-		vp_dev->ioaddr = vp_dev->legacy_map;
+	vp_dev->ioaddr = vp_dev->common_map;
 
-	if (vp_dev->device_map)
-		vp_dev->ioaddr_device = vp_dev->device_map;
-	else
-		vp_dev->ioaddr_device = vp_dev->legacy_map +
-			VIRTIO_PCI_CONFIG(vp_dev);
+	vp_dev->ioaddr_device = vp_dev->device_map;
 
-	if (vp_dev->notify_map)
-		vp_dev->ioaddr_notify = vp_dev->notify_map;
-	else
-		vp_dev->ioaddr_notify = vp_dev->legacy_map +
-			VIRTIO_PCI_QUEUE_NOTIFY;
+	vp_dev->ioaddr_notify = vp_dev->notify_map;
 
 	return 0;
 err:

^ permalink raw reply related

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: Krishna Kumar2 @ 2011-11-16  9:09 UTC (permalink / raw)
  To: jason wang
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo
In-Reply-To: <4EC353E5.4070009@redhat.com>

jason wang <jasowang@redhat.com> wrote on 11/16/2011 11:40:45 AM:

Hi Jason,

> Have any thought in mind to solve the issue of flow handling?

So far nothing concrete.

> Maybe some performance numbers first is better, it would let us know
> where we are. During the test of my patchset, I find big regression of
> small packet transmission, and more retransmissions were noticed. This
> maybe also the issue of flow affinity. One interesting things is to see
> whether this happens in your patches :)

I haven't got any results for small packet, but will run this week
and send an update. I remember my earlier patches having regression
for small packets.

> I've played with a basic flow director implementation based on my series
> which want to make sure the packets of a flow was handled by the same
> vhost thread/guest vcpu. This is done by:
>
> - bind virtqueue to guest cpu
> - record the hash to queue mapping when guest sending packets and use
> this mapping to choose the virtqueue when forwarding packets to guest
>
> Test shows some help during for receiving packets from external host and
> packet sending to local host. But it would hurt the performance of
> sending packets to remote host. This is not the perfect solution as it
> can not handle guest moving processes among vcpus, I plan to try
> accelerate RFS and sharing the mapping between host and guest.
>
> Anyway this is just for receiving, the small packet sending need more
> thoughts.

I don't recollect small packet performance for guest->local host.
Also, using multiple tuns devices on the bridge (instead of mq-tun)
balances the rx/tx of a flow to a single vq. Then you can avoid
mq-tun with it's queue selector function, etc. Have you tried it?

I will run my tests this week and get back.

thanks,

- KK

^ permalink raw reply

* Re: [PATCHv2 RFC] virtio-spec: flexible configuration layout
From: Sasha Levin @ 2011-11-16  8:17 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Krishna Kumar, kvm, Pawel Moll, Wang Sheng-Hui,
	Alexey Kardashevskiy, lkml - Kernel Mailing List, virtualization,
	penberg, Christian Borntraeger, avi, Amit Shah
In-Reply-To: <20111116072137.GF5433@redhat.com>

On Wed, 2011-11-16 at 09:21 +0200, Michael S. Tsirkin wrote:
> On Wed, Nov 16, 2011 at 10:28:52AM +1030, Rusty Russell wrote:
> > On Fri, 11 Nov 2011 09:39:13 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> > > On Fri, Nov 11, 2011 at 6:24 AM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> > > > (2) There's no huge win in keeping the same layout.  Let's make some
> > > >    cleanups.  There are more users ahead of us then behind us (I
> > > >    hope!).
> > > 
> > > Actually, if we already do cleanups, here are two more suggestions:
> > > 
> > > 1. Make 64bit features a one big 64bit block, instead of having 32bits
> > > in one place and 32 in another.
> > > 2. Remove the reserved fields out of the config (the ones that were
> > > caused by moving the ISR and the notifications out).
> > 
> > Yes, those were exactly what I was thinking.  I left it vague because
> > there might be others you can see if we're prepared to abandon the
> > current format.
> > 
> > Cheers,
> > Rusty.
> 
> Yes but driver code doesn't get any cleaner by moving the fields.
> And in fact, the legacy support makes the code messier.
> What are the advantages?
> 

What about splitting the parts which handle legacy code and new code?
It'll make it easier playing with the new spec more freely and will also
make it easier removing legacy code in the future since you'll need to
simply delete a chunk of code instead of removing legacy bits out of
working code with a surgical knife.

-- 

Sasha.

^ permalink raw reply

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: Michael S. Tsirkin @ 2011-11-16  7:23 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Krishna Kumar, gorcunov, kvm, Asias He, virtualization,
	Pekka Enberg, Sasha Levin, netdev, mingo, Stephen Hemminger
In-Reply-To: <877h31ortx.fsf@rustcorp.com.au>

On Wed, Nov 16, 2011 at 10:34:42AM +1030, Rusty Russell wrote:
> On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > > Why both the bandwidth and latency performance are dropping so dramatically
> > > > with multiple VQ?
> > > 
> > > What's the expected benefit from multiple VQs
> > 
> > Heh, the original patchset didn't mention this :) It really should.
> > They are supposed to speed up networking for high smp guests.
> 
> If we have one queue per guest CPU, does this allow us to run lockless?
> 
> Thanks,
> Rusty.

LLTX? It's supposed to be deprecated, isn't it?

-- 
MST

^ permalink raw reply

* Re: [PATCHv2 RFC] virtio-spec: flexible configuration layout
From: Michael S. Tsirkin @ 2011-11-16  7:21 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Krishna Kumar, kvm, Pawel Moll, Wang Sheng-Hui,
	Alexey Kardashevskiy, lkml - Kernel Mailing List, virtualization,
	penberg, Christian Borntraeger, Sasha Levin, Amit Shah, avi
In-Reply-To: <87aa7xos3n.fsf@rustcorp.com.au>

On Wed, Nov 16, 2011 at 10:28:52AM +1030, Rusty Russell wrote:
> On Fri, 11 Nov 2011 09:39:13 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> > On Fri, Nov 11, 2011 at 6:24 AM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> > > (2) There's no huge win in keeping the same layout.  Let's make some
> > >    cleanups.  There are more users ahead of us then behind us (I
> > >    hope!).
> > 
> > Actually, if we already do cleanups, here are two more suggestions:
> > 
> > 1. Make 64bit features a one big 64bit block, instead of having 32bits
> > in one place and 32 in another.
> > 2. Remove the reserved fields out of the config (the ones that were
> > caused by moving the ISR and the notifications out).
> 
> Yes, those were exactly what I was thinking.  I left it vague because
> there might be others you can see if we're prepared to abandon the
> current format.
> 
> Cheers,
> Rusty.

Yes but driver code doesn't get any cleaner by moving the fields.
And in fact, the legacy support makes the code messier.
What are the advantages?

-- 
MST

^ permalink raw reply

* Re: [PATCH 5 of 5] virtio: expose added descriptors immediately
From: Michael S. Tsirkin @ 2011-11-16  7:18 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Christoph Hellwig, linux-kernel, kvm, virtualization
In-Reply-To: <871ut8q5mh.fsf@rustcorp.com.au>

On Wed, Nov 16, 2011 at 10:51:26AM +1030, Rusty Russell wrote:
> On Mon, 14 Nov 2011 08:56:06 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Sun, Nov 13, 2011 at 11:03:13PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 03, 2011 at 06:12:53PM +1030, Rusty Russell wrote:
> > > > A virtio driver does virtqueue_add_buf() multiple times before finally
> > > > calling virtqueue_kick(); previously we only exposed the added buffers
> > > > in the virtqueue_kick() call.  This means we don't need a memory
> > > > barrier in virtqueue_add_buf(), but it reduces concurrency as the
> > > > device (ie. host) can't see the buffers until the kick.
> > > > 
> > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > > 
> > > In the past I played with a patch like this, but I didn't see a
> > > performance gain either way. Do you see any gain?
> > > 
> > > I'm a bit concerned that with this patch, a buggy driver that
> > > adds more than 2^16 descriptors without a kick
> > > would seem to work sometimes. Let's add WARN_ON(vq->num_added > (1 << 16))?
> > 
> > Thinking about it more - it might be tricky for drivers
> > to ensure this. add used to fail when vq is full, but now
> > driver might do get between add and notify:
> > 	lock
> > 	add_buf * N
> > 	prep
> > 	unlock
> > 	lock
> > 	get_buf * N
> > 	unlock
> > 	lock
> > 	add_buf
> > 	prep
> > 	unlock
> > 	notify
> > 
> > and since add was followed by get, this doesn't fail.
> 
> Right, the driver could, in theory, do:
>         add_buf()
>         if (!get_buf())
>                 notify()
> 
> But we don't allow that at the moment in our API: we insist on a notify
> occasionally.  Noone does this at the moment, so a WARN_ON is correct.
> 
> If you're just add_buf() without the get_buf() then add_buf() will fail
> already.
> 
> Here's my current variant:
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -245,9 +245,19 @@ add_head:
>  
>  	/* Put entry in available array (but don't update avail->idx until they
>  	 * do sync). */
> -	avail = ((vq->vring.avail->idx + vq->num_added++) & (vq->vring.num-1));
> +	avail = (vq->vring.avail->idx & (vq->vring.num-1));
>  	vq->vring.avail->ring[avail] = head;
>  
> +	/* Descriptors and available array need to be set before we expose the
> +	 * new available array entries. */
> +	virtio_wmb();
> +	vq->vring.avail->idx++;
> +	vq->num_added++;
> +
> +	/* If you haven't kicked in this long, you're probably doing something
> +	 * wrong. */
> +	WARN_ON(vq->num_added > vq->vring.num);
> +
>  	pr_debug("Added buffer head %i to %p\n", head, vq);
>  	END_USE(vq);
>  
> It's hard to write a useful WARN_ON() for the "you should kick more
> regularly" case (we could take timestamps if DEBUG is defined, I guess),
> so let's leave this until someone actually trips it.
> 
> Thanks,
> Rusty.

My unlocked kick patches will trip this warning: they make
virtio-net do add + get without kick.

I think block with unlocked kick can trip it too:
add, lock is dropped and then an interrupt can get.

We also don't need a kick each num - each 2^15 is enough.
Why don't we do this at start of add_buf:
if (vq->num_added >= 0x7fff)
	return -ENOSPC;

-- 
MST

^ permalink raw reply

* Re: [PATCHv2 RFC] virtio-spec: flexible configuration layout
From: Michael S. Tsirkin @ 2011-11-16  7:03 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Krishna Kumar, kvm, Pawel Moll, Wang Sheng-Hui,
	Alexey Kardashevskiy, lkml - Kernel Mailing List, virtualization,
	penberg, Christian Borntraeger, Sasha Levin, Amit Shah, avi
In-Reply-To: <87bosdos4y.fsf@rustcorp.com.au>

On Wed, Nov 16, 2011 at 10:28:05AM +1030, Rusty Russell wrote:
> On Sun, 13 Nov 2011 17:14:27 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Fri, Nov 11, 2011 at 02:54:31PM +1030, Rusty Russell wrote:
> > > Indeed, I'd like to see two changes to your proposal:
> > > 
> > > (1) It should be all or nothing.  If a driver can find the virtio header
> > >     capability, it should only use the capabilties.  Otherwise, it
> > >     should fall back to legacy.
> > 
> > Okay, but going forward, if we add more capabilities, we probably won't
> > want to require them and fail to load if not there.  That's really why I
> > wanted to make the failover ignore any capability separately - to make
> > this future proof.  I'm not terribly fixated on this, it just seemed a
> > bit more symmetrical to treat all capabilities in the same way. Hmm?
> 
> Sure, a future capbility may not exist.  But once a driver finds that
> virtio header structure in the capability, it should *never* fall back
> to the legacy area.  ie. it can expect that Queue Notify, ISR Status and
> Device Header all exist.
> 
> ie. either use legacy mode, or use capabilities.  Never both.
> 
> > 
> > >     Your draft suggests a mix is possible;
> > >     I prefer a clean failure (ie. one day don't present a BAR 0 *at
> > >     all*, so ancient drivers just fail to load.).
> > 
> > Just to clarify, as written in draft this is possible with the current
> > spec proposal.  So I'm guessing there's some other motivation that you
> > had in mind?
> 
> At the moment you give a hybrid model where both are used.  In five
> years' time, that's going to be particularly ugly.
> > 
> > > (2) There's no huge win in keeping the same layout.  Let's make some
> > >     cleanups.
> > 
> > About this last point - what cleanups do you have in mind?  Just move
> > some registers around?  I guess we could put feature bits near each
> > other, and move device status towards the end to avoid wasting 3
> > bytes.
> 
> > The win seems minimal, but the change does make legacy hypervisor
> > support in guests more cumbersome, as we need to spread coditional code
> > around instead of localizing it in the initialization path.
> 
> But the separation between "legacy" and "modern" will be sharper, making
> it easier to excise the legacy portion later.
> 
> And in five years' time, people implementing virtio will really thank us
> that they can completely ignore the legacy header.

OK, I get it I think.

> > >    There are more users ahead of us then behind us (I
> > >     hope!).
> > 
> > In that case isn't it safe to assume we'll find some uses
> > for the reserved registers?
> 
> How would we tell?  If we use a new capability struct for it, it's
> obvious.  Otherwise, you're going to need to steal more feature bits.

Yes, exactly, if as you suggest, we disable legacy header
when there is a capability - we can use reserved registers
for other stuff.

> Cheers,
> Rusty.
> PS.  Sorry, was off sick for a few days.

^ permalink raw reply

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: jason wang @ 2011-11-16  6:10 UTC (permalink / raw)
  To: Krishna Kumar2
  Cc: penberg, kvm, Michael S. Tsirkin, Asias He, virtualization,
	gorcunov, Sasha Levin, netdev, mingo
In-Reply-To: <OFDA747DDD.8D1C8FD8-ON65257949.001837DA-65257949.0019D25F@in.ibm.com>

On 11/15/2011 12:44 PM, Krishna Kumar2 wrote:
> Sasha Levin <levinsasha928@gmail.com> wrote on 11/14/2011 03:45:40 PM:
>
>>> Why both the bandwidth and latency performance are dropping so
>>> dramatically with multiple VQ?
>> It looks like theres no hash sync between host and guest, which makes
>> the RX VQ change for every packet. This is my guess.
> Yes, I confirmed this happens for macvtap. I am
> using ixgbe - it calls skb_record_rx_queue when
> a skb is allocated, but sets rxhash when a packet
> arrives. Macvtap is relying on record_rx_queue
> first ahead of rxhash (as part of my patch making
> macvtap multiqueue), hence different skbs result
> in macvtap selecting different vq's.
>
> Reordering macvtap to use rxhash first results in
> all packets going to the same VQ. The code snippet
> is:
>
> {
> 	...
> 	if (!numvtaps)
>                 goto out;
>
> 	rxq = skb_get_rxhash(skb);
> 	if (rxq) {
> 		tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
> 		if (tap)
> 			goto out;
> 	}
>
> 	if (likely(skb_rx_queue_recorded(skb))) {
> 		rxq = skb_get_rx_queue(skb);
>
> 		while (unlikely(rxq >= numvtaps))
> 			rxq -= numvtaps;
> 			tap = rcu_dereference(vlan->taps[rxq]);
> 			if (tap)
> 				goto out;
> 	}
> }
>
> I will submit a patch for macvtap separately. I am working
> towards the other issue pointed out - different vhost
> threads handling rx/tx of a single flow.
Hello Krishna:

Have any thought in mind to solve the issue of flow handling?

Maybe some performance numbers first is better, it would let us know
where we are. During the test of my patchset, I find big regression of
small packet transmission, and more retransmissions were noticed. This
maybe also the issue of flow affinity. One interesting things is to see
whether this happens in your patches :)

I've played with a basic flow director implementation based on my series
which want to make sure the packets of a flow was handled by the same
vhost thread/guest vcpu. This is done by:

- bind virtqueue to guest cpu
- record the hash to queue mapping when guest sending packets and use
this mapping to choose the virtqueue when forwarding packets to guest

Test shows some help during for receiving packets from external host and
packet sending to local host. But it would hurt the performance of
sending packets to remote host. This is not the perfect solution as it
can not handle guest moving processes among vcpus, I plan to try
accelerate RFS and sharing the mapping between host and guest.

Anyway this is just for receiving, the small packet sending need more
thoughts.

Thanks

>
> thanks,
>
> - KK
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] virtio-mmio: Correct the name of the guest features selector
From: Rusty Russell @ 2011-11-16  0:45 UTC (permalink / raw)
  To: Pawel Moll, Sasha Levin
  Cc: linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org
In-Reply-To: <1321367122.3137.142.camel@hornet.cambridge.arm.com>

On Tue, 15 Nov 2011 14:25:22 +0000, Pawel Moll <pawel.moll@arm.com> wrote:
> On Tue, 2011-11-15 at 14:17 +0000, Sasha Levin wrote:
> > Guest features selector spelling mistake.
> >
> > diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
> > index 27c7ede..5c7b6f0 100644
> > --- a/include/linux/virtio_mmio.h
> > +++ b/include/linux/virtio_mmio.h
> > @@ -63,7 +63,7 @@
> >  #define VIRTIO_MMIO_GUEST_FEATURES	0x020
> >  
> >  /* Activated features set selector - Write Only */
> > -#define VIRTIO_MMIO_GUEST_FEATURES_SET	0x024
> > +#define VIRTIO_MMIO_GUEST_FEATURES_SEL	0x024
> >  
> >  /* Guest's memory page size in bytes - Write Only */
> >  #define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028
> 
> Damn. Sorry about it. But if you change the header you'll need to change
> the driver:
> 
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 1f25bb9..10bb8b9 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -161,7 +161,7 @@ static void vm_finalize_features(struct virtio_device *vdev)
>         vring_transport_features(vdev);
>  
>         for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
> -               writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SET);
> +               writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
>                 writel(vdev->features[i],
>                                 vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
>         }

Rolled together and applied.

Thanks,
Rusty.

^ permalink raw reply

* Re: [PATCH] virtio-mmio: Devices parameter parsing
From: Rusty Russell @ 2011-11-16  0:42 UTC (permalink / raw)
  To: linux-kernel, virtualization; +Cc: Peter Maydell, Sasha Levin, Pawel Moll
In-Reply-To: <1321365185-2928-1-git-send-email-pawel.moll@arm.com>

On Tue, 15 Nov 2011 13:53:05 +0000, Pawel Moll <pawel.moll@arm.com> wrote:
> +static char *virtio_mmio_cmdline_devices;
> +module_param_named(devices, virtio_mmio_cmdline_devices, charp, 0);

This is the wrong way to do this.

Don't put things in a charp and process it later.  It's lazy.  You
should write parsers for it and call it straight from module_param.

And if you do it that way, multiple devices are simply multiple
arguments.

Like so:

static int virtio_mmio_set(const char *val, const struct kernel_param *kp)
{
        if (!virtio_mmio_cmdline_parent_initialized) {
        	err = device_register(&virtio_mmio_cmdline_parent);
        	if (err)
        		return err;
                virtio_mmio_cmdline_parent_initialized = true;
        }
        
        ... parse here, return -errno on fail, 0 on success.
}

static struct kernel_param_ops param_ops_virtio_mmio = {
        .set = virtio_mmio_set,
        .get = virtio_mmio_get,
};

module_param_cb(device, &param_ops_virtio_mmio, NULL, 0400);

Initialization and error handling is now done for you...

Cheers,
Rusty.

^ permalink raw reply

* Re: [PATCH 5 of 5] virtio: expose added descriptors immediately
From: Rusty Russell @ 2011-11-16  0:21 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Christoph Hellwig, linux-kernel, kvm, virtualization
In-Reply-To: <20111114065606.GA3779@redhat.com>

On Mon, 14 Nov 2011 08:56:06 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Sun, Nov 13, 2011 at 11:03:13PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 03, 2011 at 06:12:53PM +1030, Rusty Russell wrote:
> > > A virtio driver does virtqueue_add_buf() multiple times before finally
> > > calling virtqueue_kick(); previously we only exposed the added buffers
> > > in the virtqueue_kick() call.  This means we don't need a memory
> > > barrier in virtqueue_add_buf(), but it reduces concurrency as the
> > > device (ie. host) can't see the buffers until the kick.
> > > 
> > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > 
> > In the past I played with a patch like this, but I didn't see a
> > performance gain either way. Do you see any gain?
> > 
> > I'm a bit concerned that with this patch, a buggy driver that
> > adds more than 2^16 descriptors without a kick
> > would seem to work sometimes. Let's add WARN_ON(vq->num_added > (1 << 16))?
> 
> Thinking about it more - it might be tricky for drivers
> to ensure this. add used to fail when vq is full, but now
> driver might do get between add and notify:
> 	lock
> 	add_buf * N
> 	prep
> 	unlock
> 	lock
> 	get_buf * N
> 	unlock
> 	lock
> 	add_buf
> 	prep
> 	unlock
> 	notify
> 
> and since add was followed by get, this doesn't fail.

Right, the driver could, in theory, do:
        add_buf()
        if (!get_buf())
                notify()

But we don't allow that at the moment in our API: we insist on a notify
occasionally.  Noone does this at the moment, so a WARN_ON is correct.

If you're just add_buf() without the get_buf() then add_buf() will fail
already.

Here's my current variant:

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -245,9 +245,19 @@ add_head:
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
-	avail = ((vq->vring.avail->idx + vq->num_added++) & (vq->vring.num-1));
+	avail = (vq->vring.avail->idx & (vq->vring.num-1));
 	vq->vring.avail->ring[avail] = head;
 
+	/* Descriptors and available array need to be set before we expose the
+	 * new available array entries. */
+	virtio_wmb();
+	vq->vring.avail->idx++;
+	vq->num_added++;
+
+	/* If you haven't kicked in this long, you're probably doing something
+	 * wrong. */
+	WARN_ON(vq->num_added > vq->vring.num);
+
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
It's hard to write a useful WARN_ON() for the "you should kick more
regularly" case (we could take timestamps if DEBUG is defined, I guess),
so let's leave this until someone actually trips it.

Thanks,
Rusty.

^ permalink raw reply

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: Rusty Russell @ 2011-11-16  0:04 UTC (permalink / raw)
  To: Michael S. Tsirkin, Pekka Enberg
  Cc: Krishna Kumar, kvm, Asias He, virtualization, gorcunov,
	Sasha Levin, netdev, mingo, Stephen Hemminger
In-Reply-To: <20111114130507.GA18288@redhat.com>

On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > Why both the bandwidth and latency performance are dropping so dramatically
> > > with multiple VQ?
> > 
> > What's the expected benefit from multiple VQs
> 
> Heh, the original patchset didn't mention this :) It really should.
> They are supposed to speed up networking for high smp guests.

If we have one queue per guest CPU, does this allow us to run lockless?

Thanks,
Rusty.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox