Linux virtualization list
 help / color / mirror / Atom feed
* Re: [PATCH v2] virtio_pci: support enabling VFs
From: Tiwei Bie @ 2018-06-01  3:02 UTC (permalink / raw)
  To: stefanha, mst, bhelgaas, virtualization, linux-kernel, virtio-dev,
	linux-pci
  Cc: alexander.h.duyck, mark.d.rustad, zhihong.wang
In-Reply-To: <20180601020921.30957-1-tiwei.bie@intel.com>

On Fri, Jun 01, 2018 at 10:09:21AM +0800, Tiwei Bie wrote:
> There is a new feature bit allocated in virtio spec to
> support SR-IOV (Single Root I/O Virtualization):
> 
> https://github.com/oasis-tcs/virtio-spec/issues/11
> 
> This patch enables the support for this feature bit in
> virtio driver.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>

Hi Stefan,

I'm sorry, I just noticed that I forgot to include you in
the To/Cc list.

I carried your "Acked-by" for the previous version [1]
to this version (v2). Because from my understanding, this
is an ACK for this feature. And the change in this version
isn't big. The 1st change is to disable VFs when unbinding
the driver. And the 2nd one is to drop the use of
pci_sriov_configure_simple. So I guess the "ACK" is still
valid. Please let me know if I'm wrong. Thanks a lot!

Hi Michael,

I also carried your "Acked-by" for the previous version [2]
to this one (v2). Please let me know if I'm wrong. Thanks
a lot!

[1] https://lists.oasis-open.org/archives/virtio-dev/201805/msg00182.html
[2] https://lists.oasis-open.org/archives/virtio-dev/201805/msg00183.html

Best regards,
Tiwei Bie

> ---
> v2:
> - Disable VFs when unbinding the driver (Alex, MST);
> - Don't use pci_sriov_configure_simple (Alex);
> 
>  drivers/virtio/virtio_pci_common.c | 30 ++++++++++++++++++++++++++++++
>  drivers/virtio/virtio_pci_modern.c | 14 ++++++++++++++
>  include/uapi/linux/virtio_config.h |  7 ++++++-
>  3 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
> index 48d4d1cf1cb6..1d4467b2dc31 100644
> --- a/drivers/virtio/virtio_pci_common.c
> +++ b/drivers/virtio/virtio_pci_common.c
> @@ -577,6 +577,8 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
>  	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
>  	struct device *dev = get_device(&vp_dev->vdev.dev);
>  
> +	pci_disable_sriov(pci_dev);
> +
>  	unregister_virtio_device(&vp_dev->vdev);
>  
>  	if (vp_dev->ioaddr)
> @@ -588,6 +590,33 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
>  	put_device(dev);
>  }
>  
> +static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
> +{
> +	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
> +	struct virtio_device *vdev = &vp_dev->vdev;
> +	int ret;
> +
> +	if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK))
> +		return -EBUSY;
> +
> +	if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV))
> +		return -EINVAL;
> +
> +	if (pci_vfs_assigned(pci_dev))
> +		return -EPERM;
> +
> +	if (num_vfs == 0) {
> +		pci_disable_sriov(pci_dev);
> +		return 0;
> +	}
> +
> +	ret = pci_enable_sriov(pci_dev, num_vfs);
> +	if (ret < 0)
> +		return ret;
> +
> +	return num_vfs;
> +}
> +
>  static struct pci_driver virtio_pci_driver = {
>  	.name		= "virtio-pci",
>  	.id_table	= virtio_pci_id_table,
> @@ -596,6 +625,7 @@ static struct pci_driver virtio_pci_driver = {
>  #ifdef CONFIG_PM_SLEEP
>  	.driver.pm	= &virtio_pci_pm_ops,
>  #endif
> +	.sriov_configure = virtio_pci_sriov_configure,
>  };
>  
>  module_pci_driver(virtio_pci_driver);
> diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
> index 2555d80f6eec..07571daccfec 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -153,14 +153,28 @@ static u64 vp_get_features(struct virtio_device *vdev)
>  	return features;
>  }
>  
> +static void vp_transport_features(struct virtio_device *vdev, u64 features)
> +{
> +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +	struct pci_dev *pci_dev = vp_dev->pci_dev;
> +
> +	if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
> +			pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
> +		__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
> +}
> +
>  /* virtio config->finalize_features() implementation */
>  static int vp_finalize_features(struct virtio_device *vdev)
>  {
>  	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +	u64 features = vdev->features;
>  
>  	/* Give virtio_ring a chance to accept features. */
>  	vring_transport_features(vdev);
>  
> +	/* Give virtio_pci a chance to accept features. */
> +	vp_transport_features(vdev, features);
> +
>  	if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
>  		dev_err(&vdev->dev, "virtio: device uses modern interface "
>  			"but does not have VIRTIO_F_VERSION_1\n");
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index 308e2096291f..b7c1f4e7d59e 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -49,7 +49,7 @@
>   * transport being used (eg. virtio_ring), the rest are per-device feature
>   * bits. */
>  #define VIRTIO_TRANSPORT_F_START	28
> -#define VIRTIO_TRANSPORT_F_END		34
> +#define VIRTIO_TRANSPORT_F_END		38
>  
>  #ifndef VIRTIO_CONFIG_NO_LEGACY
>  /* Do we get callbacks when the ring is completely used, even if we've
> @@ -71,4 +71,9 @@
>   * this is for compatibility with legacy systems.
>   */
>  #define VIRTIO_F_IOMMU_PLATFORM		33
> +
> +/*
> + * Does the device support Single Root I/O Virtualization?
> + */
> +#define VIRTIO_F_SR_IOV			37
>  #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
> -- 
> 2.17.0
> 

^ permalink raw reply

* Re: [PATCH v2] virtio_pci: support enabling VFs
From: Michael S. Tsirkin @ 2018-06-01  3:47 UTC (permalink / raw)
  To: Tiwei Bie
  Cc: alexander.h.duyck, virtio-dev, linux-pci, linux-kernel,
	virtualization, zhihong.wang, bhelgaas, mark.d.rustad
In-Reply-To: <20180601020921.30957-1-tiwei.bie@intel.com>

On Fri, Jun 01, 2018 at 10:09:21AM +0800, Tiwei Bie wrote:
> There is a new feature bit allocated in virtio spec to
> support SR-IOV (Single Root I/O Virtualization):
> 
> https://github.com/oasis-tcs/virtio-spec/issues/11
> 
> This patch enables the support for this feature bit in
> virtio driver.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---

Generally you aren't supposed to carry forward acks
when you make major changes.

In this case I'm fine with the patch, so never mind.

> v2:
> - Disable VFs when unbinding the driver (Alex, MST);
> - Don't use pci_sriov_configure_simple (Alex);
> 
>  drivers/virtio/virtio_pci_common.c | 30 ++++++++++++++++++++++++++++++
>  drivers/virtio/virtio_pci_modern.c | 14 ++++++++++++++
>  include/uapi/linux/virtio_config.h |  7 ++++++-
>  3 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
> index 48d4d1cf1cb6..1d4467b2dc31 100644
> --- a/drivers/virtio/virtio_pci_common.c
> +++ b/drivers/virtio/virtio_pci_common.c
> @@ -577,6 +577,8 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
>  	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
>  	struct device *dev = get_device(&vp_dev->vdev.dev);
>  
> +	pci_disable_sriov(pci_dev);
> +
>  	unregister_virtio_device(&vp_dev->vdev);
>  
>  	if (vp_dev->ioaddr)
> @@ -588,6 +590,33 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
>  	put_device(dev);
>  }
>  
> +static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
> +{
> +	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
> +	struct virtio_device *vdev = &vp_dev->vdev;
> +	int ret;
> +
> +	if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK))
> +		return -EBUSY;
> +
> +	if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV))
> +		return -EINVAL;
> +
> +	if (pci_vfs_assigned(pci_dev))
> +		return -EPERM;

Not a comment on this patch - existing code has the
same race - but generally

1. this seems racy since assigning vfs does not seem
   to take device lock
2. does this work correctly for kvm at all?
   pci_set_dev_assigned seems to be only called by xen.

Can you look at addressing this pls?


> +
> +	if (num_vfs == 0) {
> +		pci_disable_sriov(pci_dev);
> +		return 0;
> +	}
> +
> +	ret = pci_enable_sriov(pci_dev, num_vfs);
> +	if (ret < 0)
> +		return ret;
> +
> +	return num_vfs;
> +}
> +
>  static struct pci_driver virtio_pci_driver = {
>  	.name		= "virtio-pci",
>  	.id_table	= virtio_pci_id_table,
> @@ -596,6 +625,7 @@ static struct pci_driver virtio_pci_driver = {
>  #ifdef CONFIG_PM_SLEEP
>  	.driver.pm	= &virtio_pci_pm_ops,
>  #endif
> +	.sriov_configure = virtio_pci_sriov_configure,
>  };
>  
>  module_pci_driver(virtio_pci_driver);
> diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
> index 2555d80f6eec..07571daccfec 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -153,14 +153,28 @@ static u64 vp_get_features(struct virtio_device *vdev)
>  	return features;
>  }
>  
> +static void vp_transport_features(struct virtio_device *vdev, u64 features)
> +{
> +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +	struct pci_dev *pci_dev = vp_dev->pci_dev;
> +
> +	if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
> +			pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
> +		__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
> +}
> +
>  /* virtio config->finalize_features() implementation */
>  static int vp_finalize_features(struct virtio_device *vdev)
>  {
>  	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +	u64 features = vdev->features;
>  
>  	/* Give virtio_ring a chance to accept features. */
>  	vring_transport_features(vdev);
>  
> +	/* Give virtio_pci a chance to accept features. */
> +	vp_transport_features(vdev, features);
> +
>  	if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
>  		dev_err(&vdev->dev, "virtio: device uses modern interface "
>  			"but does not have VIRTIO_F_VERSION_1\n");
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index 308e2096291f..b7c1f4e7d59e 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -49,7 +49,7 @@
>   * transport being used (eg. virtio_ring), the rest are per-device feature
>   * bits. */
>  #define VIRTIO_TRANSPORT_F_START	28
> -#define VIRTIO_TRANSPORT_F_END		34
> +#define VIRTIO_TRANSPORT_F_END		38
>  
>  #ifndef VIRTIO_CONFIG_NO_LEGACY
>  /* Do we get callbacks when the ring is completely used, even if we've
> @@ -71,4 +71,9 @@
>   * this is for compatibility with legacy systems.
>   */
>  #define VIRTIO_F_IOMMU_PLATFORM		33
> +
> +/*
> + * Does the device support Single Root I/O Virtualization?
> + */
> +#define VIRTIO_F_SR_IOV			37
>  #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
> -- 
> 2.17.0

^ permalink raw reply

* Re: [PATCH v2] virtio_pci: support enabling VFs
From: Tiwei Bie @ 2018-06-01  4:00 UTC (permalink / raw)
  To: Michael S. Tsirkin, stefanha
  Cc: alexander.h.duyck, virtio-dev, linux-pci, linux-kernel,
	virtualization, zhihong.wang, bhelgaas, mark.d.rustad
In-Reply-To: <20180601063224-mutt-send-email-mst@kernel.org>

On Fri, Jun 01, 2018 at 06:47:01AM +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 01, 2018 at 10:09:21AM +0800, Tiwei Bie wrote:
> > There is a new feature bit allocated in virtio spec to
> > support SR-IOV (Single Root I/O Virtualization):
> > 
> > https://github.com/oasis-tcs/virtio-spec/issues/11
> > 
> > This patch enables the support for this feature bit in
> > virtio driver.
> > 
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Acked-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> 
> Generally you aren't supposed to carry forward acks
> when you make major changes.

Got it! Thank you very much!

> 
> In this case I'm fine with the patch, so never mind.

Thank you so much! Anyway, it's my fault.
I'll send a new version dropping all acks.

> 
[...]
> >  
> > +static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
> > +{
> > +	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
> > +	struct virtio_device *vdev = &vp_dev->vdev;
> > +	int ret;
> > +
> > +	if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK))
> > +		return -EBUSY;
> > +
> > +	if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV))
> > +		return -EINVAL;
> > +
> > +	if (pci_vfs_assigned(pci_dev))
> > +		return -EPERM;
> 
> Not a comment on this patch - existing code has the
> same race - but generally
> 
> 1. this seems racy since assigning vfs does not seem
>    to take device lock
> 2. does this work correctly for kvm at all?
>    pci_set_dev_assigned seems to be only called by xen.
> 
> Can you look at addressing this pls?

Sure! I'll do it!

Best regards,
Tiwei Bie

^ permalink raw reply

* [PATCH v3] virtio_pci: support enabling VFs
From: Tiwei Bie @ 2018-06-01  4:02 UTC (permalink / raw)
  To: mst, bhelgaas, stefanha, virtualization, linux-kernel, virtio-dev,
	linux-pci
  Cc: alexander.h.duyck, mark.d.rustad, zhihong.wang

There is a new feature bit allocated in virtio spec to
support SR-IOV (Single Root I/O Virtualization):

https://github.com/oasis-tcs/virtio-spec/issues/11

This patch enables the support for this feature bit in
virtio driver.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
v3:
- Drop the acks;

v2:
- Disable VFs when unbinding the driver (Alex, MST);
- Don't use pci_sriov_configure_simple (Alex);

 drivers/virtio/virtio_pci_common.c | 30 ++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci_modern.c | 14 ++++++++++++++
 include/uapi/linux/virtio_config.h |  7 ++++++-
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 48d4d1cf1cb6..1d4467b2dc31 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -577,6 +577,8 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 	struct device *dev = get_device(&vp_dev->vdev.dev);
 
+	pci_disable_sriov(pci_dev);
+
 	unregister_virtio_device(&vp_dev->vdev);
 
 	if (vp_dev->ioaddr)
@@ -588,6 +590,33 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 	put_device(dev);
 }
 
+static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
+{
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_device *vdev = &vp_dev->vdev;
+	int ret;
+
+	if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK))
+		return -EBUSY;
+
+	if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV))
+		return -EINVAL;
+
+	if (pci_vfs_assigned(pci_dev))
+		return -EPERM;
+
+	if (num_vfs == 0) {
+		pci_disable_sriov(pci_dev);
+		return 0;
+	}
+
+	ret = pci_enable_sriov(pci_dev, num_vfs);
+	if (ret < 0)
+		return ret;
+
+	return num_vfs;
+}
+
 static struct pci_driver virtio_pci_driver = {
 	.name		= "virtio-pci",
 	.id_table	= virtio_pci_id_table,
@@ -596,6 +625,7 @@ static struct pci_driver virtio_pci_driver = {
 #ifdef CONFIG_PM_SLEEP
 	.driver.pm	= &virtio_pci_pm_ops,
 #endif
+	.sriov_configure = virtio_pci_sriov_configure,
 };
 
 module_pci_driver(virtio_pci_driver);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 2555d80f6eec..07571daccfec 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -153,14 +153,28 @@ static u64 vp_get_features(struct virtio_device *vdev)
 	return features;
 }
 
+static void vp_transport_features(struct virtio_device *vdev, u64 features)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct pci_dev *pci_dev = vp_dev->pci_dev;
+
+	if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
+			pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
+		__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
+}
+
 /* virtio config->finalize_features() implementation */
 static int vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u64 features = vdev->features;
 
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
+	/* Give virtio_pci a chance to accept features. */
+	vp_transport_features(vdev, features);
+
 	if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
 		dev_err(&vdev->dev, "virtio: device uses modern interface "
 			"but does not have VIRTIO_F_VERSION_1\n");
diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 308e2096291f..b7c1f4e7d59e 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -49,7 +49,7 @@
  * transport being used (eg. virtio_ring), the rest are per-device feature
  * bits. */
 #define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		34
+#define VIRTIO_TRANSPORT_F_END		38
 
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
@@ -71,4 +71,9 @@
  * this is for compatibility with legacy systems.
  */
 #define VIRTIO_F_IOMMU_PLATFORM		33
+
+/*
+ * Does the device support Single Root I/O Virtualization?
+ */
+#define VIRTIO_F_SR_IOV			37
 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
From: Stefan Hajnoczi @ 2018-06-01  8:59 UTC (permalink / raw)
  To: Liu, Changpeng
  Cc: pbonzini@redhat.com, cavery@redhat.com,
	virtualization@lists.linux-foundation.org
In-Reply-To: <FF7FC980937D6342B9D289F5F3C7C2625B6DCEE5@SHSMSX103.ccr.corp.intel.com>


[-- Attachment #1.1: Type: text/plain, Size: 2364 bytes --]

On Thu, May 31, 2018 at 11:53:26PM +0000, Liu, Changpeng wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Thursday, May 31, 2018 6:31 PM
> > To: Liu, Changpeng <changpeng.liu@intel.com>
> > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > <wei.w.wang@intel.com>
> > Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> > support
> > 
> > On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
> > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > >  	if (num) {
> > > -		if (rq_data_dir(req) == WRITE)
> > > +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> > ||
> > > +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
> > >  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> > VIRTIO_BLK_T_OUT);
> > 
> > The VIRTIO specification says:
> > 
> >   The type of the request is either a read (VIRTIO_BLK_T_IN), a write
> >   (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
> >   (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
> > 
> > But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
> > VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
> > exclusive, it does not mean "A and B".
> Hi Stefan,
> 
> For the new add DISCARD and WRITE ZEROES commands, I defined the 
> type code to 11 and 13, so the last bit always is 1, there is no difference
> in practice.

Then the code is misleading.  This is clearer:

  if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES)
      /* Do nothing, type already set */
  else if (rq_data_dir(req) == WRITE)
      vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
  ...

> But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
> OR the two bits together should compliance with the specification.

I cannot find that in the specification:

  http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-2020002

and it would contradict the "The type of the request is either ... or
..." wording that I quoted from the spec above.

If you do find something in the spec, please let me know and we can
figure out how to make the spec consistent.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* [PATCH] virtio: update the comments for transport features
From: Tiwei Bie @ 2018-06-01 10:22 UTC (permalink / raw)
  To: mst, jasowang, stefanha, virtualization, linux-kernel, virtio-dev

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
This patch is generated on top of below patch:
https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html

 include/uapi/linux/virtio_config.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index b7c1f4e7d59e..479affd903e9 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -45,9 +45,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
-/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
- * transport being used (eg. virtio_ring), the rest are per-device feature
- * bits. */
+/*
+ * Some virtio feature bits (currently bits VIRTIO_TRANSPORT_F_START
+ * through VIRTIO_TRANSPORT_F_END) are reserved for the transport being
+ * used (e.g. virtio_ring, virtio_pci etc.), the rest are per-device
+ * feature bits.
+ */
 #define VIRTIO_TRANSPORT_F_START	28
 #define VIRTIO_TRANSPORT_F_END		38
 
-- 
2.17.0

^ permalink raw reply related

* Re: [virtio-dev] [PATCH] virtio: update the comments for transport features
From: Cornelia Huck @ 2018-06-01 10:45 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: virtio-dev, mst, linux-kernel, virtualization, stefanha
In-Reply-To: <20180601102217.21628-1-tiwei.bie@intel.com>

On Fri,  1 Jun 2018 18:22:17 +0800
Tiwei Bie <tiwei.bie@intel.com> wrote:

> Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> This patch is generated on top of below patch:
> https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html
> 
>  include/uapi/linux/virtio_config.h | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index b7c1f4e7d59e..479affd903e9 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -45,9 +45,12 @@
>  /* We've given up on this device. */
>  #define VIRTIO_CONFIG_S_FAILED		0x80
>  
> -/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
> - * transport being used (eg. virtio_ring), the rest are per-device feature
> - * bits. */
> +/*
> + * Some virtio feature bits (currently bits VIRTIO_TRANSPORT_F_START
> + * through VIRTIO_TRANSPORT_F_END) are reserved for the transport being

It will always be bits up to VIRTIO_TRANSPORT_F_END, no? So you can drop
the "currently"?

Or reword as "Virtio feature bits VIRTIO_TRANSPORT_F_START through
VIRTIO_TRANSPORT_F_END are reserved..."?

> + * used (e.g. virtio_ring, virtio_pci etc.), the rest are per-device
> + * feature bits.
> + */
>  #define VIRTIO_TRANSPORT_F_START	28
>  #define VIRTIO_TRANSPORT_F_END		38
>  

^ permalink raw reply

* Re: [virtio-dev] [PATCH] virtio: update the comments for transport features
From: Tiwei Bie @ 2018-06-01 10:52 UTC (permalink / raw)
  To: Cornelia Huck; +Cc: virtio-dev, mst, linux-kernel, virtualization, stefanha
In-Reply-To: <20180601124552.6a188454.cohuck@redhat.com>

On Fri, Jun 01, 2018 at 12:45:52PM +0200, Cornelia Huck wrote:
> On Fri,  1 Jun 2018 18:22:17 +0800
> Tiwei Bie <tiwei.bie@intel.com> wrote:
> 
> > Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > This patch is generated on top of below patch:
> > https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html
> > 
> >  include/uapi/linux/virtio_config.h | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> > index b7c1f4e7d59e..479affd903e9 100644
> > --- a/include/uapi/linux/virtio_config.h
> > +++ b/include/uapi/linux/virtio_config.h
> > @@ -45,9 +45,12 @@
> >  /* We've given up on this device. */
> >  #define VIRTIO_CONFIG_S_FAILED		0x80
> >  
> > -/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
> > - * transport being used (eg. virtio_ring), the rest are per-device feature
> > - * bits. */
> > +/*
> > + * Some virtio feature bits (currently bits VIRTIO_TRANSPORT_F_START
> > + * through VIRTIO_TRANSPORT_F_END) are reserved for the transport being
> 
> It will always be bits up to VIRTIO_TRANSPORT_F_END, no? So you can drop
> the "currently"?
> 
> Or reword as "Virtio feature bits VIRTIO_TRANSPORT_F_START through
> VIRTIO_TRANSPORT_F_END are reserved..."?

I will do it. Thanks for the suggestion!

Best regards,
Tiwei Bie

^ permalink raw reply

* [PATCH v2] virtio: update the comments for transport features
From: Tiwei Bie @ 2018-06-01 10:59 UTC (permalink / raw)
  To: mst, jasowang, stefanha, cohuck, virtualization, linux-kernel,
	virtio-dev

The existing comments for transport features are out dated.
So update them to address the latest changes in the spec.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
This patch is generated on top of below patch:
https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html

v2:
- Improve the comments (Cornelia);
- Improve the commit message;

 include/uapi/linux/virtio_config.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index b7c1f4e7d59e..449132c76b1c 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -45,9 +45,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
-/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
- * transport being used (eg. virtio_ring), the rest are per-device feature
- * bits. */
+/*
+ * Virtio feature bits VIRTIO_TRANSPORT_F_START through
+ * VIRTIO_TRANSPORT_F_END are reserved for the transport
+ * being used (e.g. virtio_ring, virtio_pci etc.), the
+ * rest are per-device feature bits.
+ */
 #define VIRTIO_TRANSPORT_F_START	28
 #define VIRTIO_TRANSPORT_F_END		38
 
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH v2] virtio: update the comments for transport features
From: Cornelia Huck @ 2018-06-01 11:16 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: virtio-dev, mst, linux-kernel, virtualization, stefanha
In-Reply-To: <20180601105950.10632-1-tiwei.bie@intel.com>

On Fri,  1 Jun 2018 18:59:50 +0800
Tiwei Bie <tiwei.bie@intel.com> wrote:

> The existing comments for transport features are out dated.

s/out dated/outdated/

> So update them to address the latest changes in the spec.
> 
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> This patch is generated on top of below patch:
> https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html
> 
> v2:
> - Improve the comments (Cornelia);
> - Improve the commit message;
> 
>  include/uapi/linux/virtio_config.h | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> index b7c1f4e7d59e..449132c76b1c 100644
> --- a/include/uapi/linux/virtio_config.h
> +++ b/include/uapi/linux/virtio_config.h
> @@ -45,9 +45,12 @@
>  /* We've given up on this device. */
>  #define VIRTIO_CONFIG_S_FAILED		0x80
>  
> -/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
> - * transport being used (eg. virtio_ring), the rest are per-device feature
> - * bits. */
> +/*
> + * Virtio feature bits VIRTIO_TRANSPORT_F_START through
> + * VIRTIO_TRANSPORT_F_END are reserved for the transport
> + * being used (e.g. virtio_ring, virtio_pci etc.), the
> + * rest are per-device feature bits.
> + */
>  #define VIRTIO_TRANSPORT_F_START	28
>  #define VIRTIO_TRANSPORT_F_END		38
>  

Reviewed-by: Cornelia Huck <cohuck@redhat.com>

^ permalink raw reply

* Re: [PATCH v2] virtio: update the comments for transport features
From: Tiwei Bie @ 2018-06-01 11:22 UTC (permalink / raw)
  To: Cornelia Huck; +Cc: virtio-dev, mst, linux-kernel, virtualization, stefanha
In-Reply-To: <20180601131647.5d02da18.cohuck@redhat.com>

On Fri, Jun 01, 2018 at 01:16:47PM +0200, Cornelia Huck wrote:
> On Fri,  1 Jun 2018 18:59:50 +0800
> Tiwei Bie <tiwei.bie@intel.com> wrote:
> 
> > The existing comments for transport features are out dated.
> 
> s/out dated/outdated/

Thanks for catching this typo!

> 
[...]
> > +/*
> > + * Virtio feature bits VIRTIO_TRANSPORT_F_START through
> > + * VIRTIO_TRANSPORT_F_END are reserved for the transport
> > + * being used (e.g. virtio_ring, virtio_pci etc.), the
> > + * rest are per-device feature bits.
> > + */
> >  #define VIRTIO_TRANSPORT_F_START	28
> >  #define VIRTIO_TRANSPORT_F_END		38
> >  
> 
> Reviewed-by: Cornelia Huck <cohuck@redhat.com>

Thanks for your review!

Best regards,
Tiwei Bie

^ permalink raw reply

* [PATCH v3] virtio: update the comments for transport features
From: Tiwei Bie @ 2018-06-01 11:26 UTC (permalink / raw)
  To: mst, jasowang, stefanha, cohuck, virtualization, linux-kernel,
	virtio-dev

The existing comments for transport features are outdated.
So update them to address the latest changes in the spec.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
---
This patch is generated on top of below patch:
https://lists.oasis-open.org/archives/virtio-dev/201805/msg00212.html

v3:
- Fix a typo in the commit message (Cornelia);

v2:
- Improve the comments (Cornelia);
- Improve the commit message;

 include/uapi/linux/virtio_config.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index b7c1f4e7d59e..449132c76b1c 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -45,9 +45,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
-/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
- * transport being used (eg. virtio_ring), the rest are per-device feature
- * bits. */
+/*
+ * Virtio feature bits VIRTIO_TRANSPORT_F_START through
+ * VIRTIO_TRANSPORT_F_END are reserved for the transport
+ * being used (e.g. virtio_ring, virtio_pci etc.), the
+ * rest are per-device feature bits.
+ */
 #define VIRTIO_TRANSPORT_F_START	28
 #define VIRTIO_TRANSPORT_F_END		38
 
-- 
2.17.0

^ permalink raw reply related

* Re: [RFC V5 PATCH 8/8] vhost: event suppression for packed ring
From: Wei Xu @ 2018-06-03 15:40 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, mst, netdev, linux-kernel, virtualization
In-Reply-To: <12f2c455-5868-3b07-0eba-d49dcafd10f2@redhat.com>

On Thu, May 31, 2018 at 11:09:07AM +0800, Jason Wang wrote:
> 
> 
> On 2018年05月30日 19:42, Wei Xu wrote:
> >>  /* This actually signals the guest, using eventfd. */
> >>  void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> >>  {
> >>@@ -2802,10 +2930,34 @@ static bool vhost_enable_notify_packed(struct vhost_dev *dev,
> >>  				       struct vhost_virtqueue *vq)
> >>  {
> >>  	struct vring_desc_packed *d = vq->desc_packed + vq->avail_idx;
> >>-	__virtio16 flags;
> >>+	__virtio16 flags = RING_EVENT_FLAGS_ENABLE;
> >>  	int ret;
> >>-	/* FIXME: disable notification through device area */
> >>+	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
> >>+		return false;
> >>+	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
> >'used_flags' was originally designed for 1.0, why should we pay attetion to it here?
> >
> >Wei
> 
> It was used to recored whether or not we've disabled notification. Then we
> can avoid unnecessary userspace writes or memory barriers.

OK, thanks.

> 
> Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* RE: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
From: Liu, Changpeng @ 2018-06-04  4:14 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: pbonzini@redhat.com, cavery@redhat.com,
	virtualization@lists.linux-foundation.org
In-Reply-To: <20180601085935.GA8196@stefanha-x1.localdomain>



> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Friday, June 1, 2018 5:00 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> <wei.w.wang@intel.com>
> Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> support
> 
> On Thu, May 31, 2018 at 11:53:26PM +0000, Liu, Changpeng wrote:
> >
> >
> > > -----Original Message-----
> > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > Sent: Thursday, May 31, 2018 6:31 PM
> > > To: Liu, Changpeng <changpeng.liu@intel.com>
> > > Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> > > jasowang@redhat.com; pbonzini@redhat.com; Wang, Wei W
> > > <wei.w.wang@intel.com>
> > > Subject: Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands
> > > support
> > >
> > > On Tue, May 29, 2018 at 09:42:24AM +0800, Changpeng Liu wrote:
> > > >  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > > >  	if (num) {
> > > > -		if (rq_data_dir(req) == WRITE)
> > > > +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> > > ||
> > > > +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
> > > >  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> > > VIRTIO_BLK_T_OUT);
> > >
> > > The VIRTIO specification says:
> > >
> > >   The type of the request is either a read (VIRTIO_BLK_T_IN), a write
> > >   (VIRTIO_BLK_T_OUT), a discard (VIRTIO_BLK_T_DISCARD), a write zeroes
> > >   (VIRTIO_BLK_T_WRITE_ZEROES) or a flush (VIRTIO_BLK_T_FLUSH).
> > >
> > > But this patch actually uses VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_OUT or
> > > VIRTIO_BLK_T_WRITE_ZEROES | VIRTIO_BLK_T_OUT.  "either A or B" is
> > > exclusive, it does not mean "A and B".
> > Hi Stefan,
> >
> > For the new add DISCARD and WRITE ZEROES commands, I defined the
> > type code to 11 and 13, so the last bit always is 1, there is no difference
> > in practice.
> 
> Then the code is misleading.  This is clearer:
> 
>   if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES)
>       /* Do nothing, type already set */
>   else if (rq_data_dir(req) == WRITE)
>       vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
>   ...
This change sounds good to me, will change the patch accordingly.
> 
> > But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
> > OR the two bits together should compliance with the specification.
> 
> I cannot find that in the specification:
> 
>   http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-
> 2020002
> 
> and it would contradict the "The type of the request is either ... or
> ..." wording that I quoted from the spec above.
> 
> If you do find something in the spec, please let me know and we can
> figure out how to make the spec consistent.
I saw comments from file linux/usr/include/uapi/linux/virtio_blk.h, which says
VIRTIO_BLK_T_OUT may be combined with other commands and means direction,
the specification does not have such description.

^ permalink raw reply

* Re: [PATCH] drm/qxl: Call qxl_bo_unref outside atomic context
From: Gerd Hoffmann @ 2018-06-04  7:37 UTC (permalink / raw)
  To: Jeremy Cline
  Cc: Ray Strode, linux-kernel, stable, virtualization, dri-devel,
	Dave Airlie
In-Reply-To: <20180601200532.13619-1-jcline@redhat.com>

On Fri, Jun 01, 2018 at 04:05:32PM -0400, Jeremy Cline wrote:
> "qxl_bo_unref" may sleep, but calling "qxl_release_map" causes
> "preempt_disable()" to be called and "preempt_enable()" isn't called
> until "qxl_release_unmap" is used. Move the call to "qxl_bo_unref" out
> from in between the two to avoid sleeping from an atomic context.
> 
> This issue can be demonstrated on a kernel with CONFIG_LOCKDEP=y by
> creating a VM using QXL, using a desktop environment using Xorg, then
> moving the cursor on or off a window.
> 
> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1571128
> Fixes: 9428088c90b6 ("drm/qxl: reapply cursor after resetting primary")
> Cc: stable@vger.kernel.org
> Signed-off-by: Jeremy Cline <jcline@redhat.com>

Pushed to drm-misc-fixes.

thanks,
  Gerd

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: David Gibson @ 2018-06-04  8:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: robh, Michael S. Tsirkin, mpe, linux-kernel, virtualization, hch,
	joe, linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <f77638ddef3af52dd71341083707c9e3745dd505.camel@kernel.crashing.org>


[-- Attachment #1.1: Type: text/plain, Size: 1476 bytes --]

On Thu, May 24, 2018 at 08:27:04AM +1000, Benjamin Herrenschmidt wrote:
> On Wed, 2018-05-23 at 21:50 +0300, Michael S. Tsirkin wrote:
> 
> > I re-read that discussion and I'm still unclear on the
> > original question, since I got several apparently
> > conflicting answers.
> > 
> > I asked:
> > 
> > 	Why isn't setting VIRTIO_F_IOMMU_PLATFORM on the
> > 	hypervisor side sufficient?
> 
> I thought I had replied to this...
> 
> There are a couple of reasons:
> 
> - First qemu doesn't know that the guest will switch to "secure mode"
> in advance. There is no difference between a normal and a secure
> partition until the partition does the magic UV call to "enter secure
> mode" and qemu doesn't see any of it. So who can set the flag here ?

This seems weird to me.  As a rule HV calls should go through qemu -
or be allowed to go directly to KVM *by* qemu.  We generally reserve
the latter for hot path things.  Since this isn't a hot path, having
the call handled directly by the kernel seems wrong.

Unless a "UV call" is something different I don't know about.

> - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> vhost) go through the emulated MMIO for every access to the guest,
> which adds additional overhead.
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Benjamin Herrenschmidt @ 2018-06-04  9:48 UTC (permalink / raw)
  To: David Gibson
  Cc: robh, Michael S. Tsirkin, mpe, linux-kernel, virtualization, hch,
	joe, linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <20180604085742.GQ4251@umbus>

On Mon, 2018-06-04 at 18:57 +1000, David Gibson wrote:
> 
> > - First qemu doesn't know that the guest will switch to "secure mode"
> > in advance. There is no difference between a normal and a secure
> > partition until the partition does the magic UV call to "enter secure
> > mode" and qemu doesn't see any of it. So who can set the flag here ?
> 
> This seems weird to me.  As a rule HV calls should go through qemu -
> or be allowed to go directly to KVM *by* qemu.

It's not an HV call, it's a UV call, qemu won't see it, qemu isn't
trusted. Now the UV *will* reflect that to the HV via some synthetized
HV calls, and we *could* have those do a pass by qemu, however, so far,
our entire design doesn't rely on *any* qemu knowledge whatsoever and
it would be sad to add it just for that purpose.

Additionally, this is rather orthogonal, see my other email, the
problem we are trying to solve is *not* a qemu problem and it doesn't
make sense to leak that into qemu.

>   We generally reserve
> the latter for hot path things.  Since this isn't a hot path, having
> the call handled directly by the kernel seems wrong.
>
> Unless a "UV call" is something different I don't know about.

Yes, a UV call goes to the Ultravisor, not the Hypervisor. The
Hypervisor isn't trusted.

> > - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> > vhost) go through the emulated MMIO for every access to the guest,
> > which adds additional overhead.
> 
Ben.

^ permalink raw reply

* Re: [PATCH v4] virtio_blk: add DISCARD and WRIET ZEROES commands support
From: Paolo Bonzini @ 2018-06-04 10:02 UTC (permalink / raw)
  To: Liu, Changpeng, Stefan Hajnoczi
  Cc: cavery@redhat.com, virtualization@lists.linux-foundation.org
In-Reply-To: <FF7FC980937D6342B9D289F5F3C7C2625B6DD85E@SHSMSX103.ccr.corp.intel.com>

On 04/06/2018 06:14, Liu, Changpeng wrote:
>>> But I believe the specification says VIRTIO_BLK_T_OUT means direction, so
>>> OR the two bits together should compliance with the specification.
>> I cannot find that in the specification:
>>
>>   http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-
>> 2020002
>>
>> and it would contradict the "The type of the request is either ... or
>> ..." wording that I quoted from the spec above.
>>
>> If you do find something in the spec, please let me know and we can
>> figure out how to make the spec consistent.
>
> I saw comments from file linux/usr/include/uapi/linux/virtio_blk.h, which says
> VIRTIO_BLK_T_OUT may be combined with other commands and means direction,
> the specification does not have such description.

I don't think it is in the specification indeed (however, 11 and 13 were
chosen so that VIRTIO_BLK_T_OUT can still indicate direction).

Paolo

^ permalink raw reply

* Re: [net] vhost: Use kzalloc() to allocate vhost_msg_node
From: Dmitry Vyukov via Virtualization @ 2018-06-04 12:34 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Kevin Easton, KVM list, netdev, syzkaller-bugs, LKML,
	virtualization, Guenter Roeck
In-Reply-To: <20180530055704-mutt-send-email-mst@kernel.org>

On Wed, May 30, 2018 at 5:01 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Tue, May 29, 2018 at 03:19:08PM -0700, Guenter Roeck wrote:
>> On Fri, Apr 27, 2018 at 11:45:02AM -0400, Kevin Easton wrote:
>> > The struct vhost_msg within struct vhost_msg_node is copied to userspace,
>> > so it should be allocated with kzalloc() to ensure all structure padding
>> > is zeroed.
>> >
>> > Signed-off-by: Kevin Easton <kevin@guarana.org>
>> > Reported-by: syzbot+87cfa083e727a224754b@syzkaller.appspotmail.com
>>
>> Is this patch going anywhere ?
>>
>> The patch fixes CVE-2018-1118. It would be useful to understand if and when
>> this problem is going to be fixed.
>>
>> Thanks,
>> Guenter
>> > ---
>> >  drivers/vhost/vhost.c | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> > index f3bd8e9..1b84dcff 100644
>> > --- a/drivers/vhost/vhost.c
>> > +++ b/drivers/vhost/vhost.c
>> > @@ -2339,7 +2339,7 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
>> >  /* Create a new message. */
>> >  struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
>> >  {
>> > -   struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
>> > +   struct vhost_msg_node *node = kzalloc(sizeof *node, GFP_KERNEL);
>> >     if (!node)
>> >             return NULL;
>> >     node->vq = vq;
>
> As I pointed out, we don't need to init the whole structure. The proper
> fix is thus (I think) below.
>
> Could you use your testing infrastructure to confirm this fixes the issue?

Hi Michael,

syzbot is self-service, see:

https://github.com/google/syzkaller/blob/master/docs/syzbot.md#testing-patches


> Thanks!
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index f3bd8e941224..58d9aec90afb 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -2342,6 +2342,9 @@ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
>         struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
>         if (!node)
>                 return NULL;
> +
> +       /* Make sure all padding within the structure is initialized. */
> +       memset(&node->msg, 0, sizeof node->msg);
>         node->vq = vq;
>         node->msg.type = type;
>         return node;
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller-bugs/20180530055704-mutt-send-email-mst%40kernel.org.
> For more options, visit https://groups.google.com/d/optout.

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Michael S. Tsirkin @ 2018-06-04 12:43 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: robh, mpe, linux-kernel, virtualization, hch, joe, david,
	linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <f77638ddef3af52dd71341083707c9e3745dd505.camel@kernel.crashing.org>

On Thu, May 24, 2018 at 08:27:04AM +1000, Benjamin Herrenschmidt wrote:
> On Wed, 2018-05-23 at 21:50 +0300, Michael S. Tsirkin wrote:
> 
> > I re-read that discussion and I'm still unclear on the
> > original question, since I got several apparently
> > conflicting answers.
> > 
> > I asked:
> > 
> > 	Why isn't setting VIRTIO_F_IOMMU_PLATFORM on the
> > 	hypervisor side sufficient?
> 
> I thought I had replied to this...
> 
> There are a couple of reasons:
> 
> - First qemu doesn't know that the guest will switch to "secure mode"
> in advance. There is no difference between a normal and a secure
> partition until the partition does the magic UV call to "enter secure
> mode" and qemu doesn't see any of it. So who can set the flag here ?

The user should set it. You just tell user "to be able to use with
feature X, enable IOMMU".

> - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> vhost) go through the emulated MMIO for every access to the guest,
> which adds additional overhead.
> 
> Cheers,
> Ben.

There are several answers to this.  One is that we are working hard to
make overhead small when the mappings are static (which they would be if
there's no actual IOMMU). So maybe especially given you are using
a bounce buffer on top it's not so bad - did you try to
benchmark?

Another is that given the basic functionality is in there, optimizations
can possibly wait until per-device quirks in DMA API are supported.


> > 
> > 
> > >  arch/powerpc/include/asm/dma-mapping.h |  6 ++++++
> > >  arch/powerpc/platforms/pseries/iommu.c | 11 +++++++++++
> > >  drivers/virtio/virtio_ring.c           | 10 ++++++++++
> > >  3 files changed, 27 insertions(+)
> > > 
> > > diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> > > index 8fa3945..056e578 100644
> > > --- a/arch/powerpc/include/asm/dma-mapping.h
> > > +++ b/arch/powerpc/include/asm/dma-mapping.h
> > > @@ -115,4 +115,10 @@ extern u64 __dma_get_required_mask(struct device *dev);
> > >  #define ARCH_HAS_DMA_MMAP_COHERENT
> > >  
> > >  #endif /* __KERNEL__ */
> > > +
> > > +#define platform_forces_virtio_dma platform_forces_virtio_dma
> > > +
> > > +struct virtio_device;
> > > +
> > > +extern bool platform_forces_virtio_dma(struct virtio_device *vdev);
> > >  #endif	/* _ASM_DMA_MAPPING_H */
> > > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > > index 06f0296..a2ec15a 100644
> > > --- a/arch/powerpc/platforms/pseries/iommu.c
> > > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > > @@ -38,6 +38,7 @@
> > >  #include <linux/of.h>
> > >  #include <linux/iommu.h>
> > >  #include <linux/rculist.h>
> > > +#include <linux/virtio.h>
> > >  #include <asm/io.h>
> > >  #include <asm/prom.h>
> > >  #include <asm/rtas.h>
> > > @@ -1396,3 +1397,13 @@ static int __init disable_multitce(char *str)
> > >  __setup("multitce=", disable_multitce);
> > >  
> > >  machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
> > > +
> > > +bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > +{
> > > +	/*
> > > +	 * On protected guest platforms, force virtio core to use DMA
> > > +	 * MAP API for all virtio devices. But there can also be some
> > > +	 * exceptions for individual devices like virtio balloon.
> > > +	 */
> > > +	return (of_find_compatible_node(NULL, NULL, "ibm,ultravisor") != NULL);
> > > +}
> > 
> > Isn't this kind of slow?  vring_use_dma_api is on
> > data path and supposed to be very fast.
> > 
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 21d464a..47ea6c3 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -141,8 +141,18 @@ struct vring_virtqueue {
> > >   * unconditionally on data path.
> > >   */
> > >  
> > > +#ifndef platform_forces_virtio_dma
> > > +static inline bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > +{
> > > +	return false;
> > > +}
> > > +#endif
> > > +
> > >  static bool vring_use_dma_api(struct virtio_device *vdev)
> > >  {
> > > +	if (platform_forces_virtio_dma(vdev))
> > > +		return true;
> > > +
> > >  	if (!virtio_has_iommu_quirk(vdev))
> > >  		return true;
> > >  
> > > -- 
> > > 2.9.3

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Michael S. Tsirkin @ 2018-06-04 12:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: robh, mpe, linux-kernel, virtualization, hch, joe, David Gibson,
	linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <d5df613d6347fe2f9bb6ea65bc6f6be05650ca6f.camel@kernel.crashing.org>

On Mon, Jun 04, 2018 at 07:48:54PM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2018-06-04 at 18:57 +1000, David Gibson wrote:
> > 
> > > - First qemu doesn't know that the guest will switch to "secure mode"
> > > in advance. There is no difference between a normal and a secure
> > > partition until the partition does the magic UV call to "enter secure
> > > mode" and qemu doesn't see any of it. So who can set the flag here ?
> > 
> > This seems weird to me.  As a rule HV calls should go through qemu -
> > or be allowed to go directly to KVM *by* qemu.
> 
> It's not an HV call, it's a UV call, qemu won't see it, qemu isn't
> trusted. Now the UV *will* reflect that to the HV via some synthetized
> HV calls, and we *could* have those do a pass by qemu, however, so far,
> our entire design doesn't rely on *any* qemu knowledge whatsoever and
> it would be sad to add it just for that purpose.

It's a temporary work-around. I think that the long-term fix is to
support per-device quirks and have the DMA API DTRT for virtio.

> Additionally, this is rather orthogonal, see my other email, the
> problem we are trying to solve is *not* a qemu problem and it doesn't
> make sense to leak that into qemu.
> 
> >   We generally reserve
> > the latter for hot path things.  Since this isn't a hot path, having
> > the call handled directly by the kernel seems wrong.
> >
> > Unless a "UV call" is something different I don't know about.
> 
> Yes, a UV call goes to the Ultravisor, not the Hypervisor. The
> Hypervisor isn't trusted.
> 
> > > - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> > > vhost) go through the emulated MMIO for every access to the guest,
> > > which adds additional overhead.
> > 
> Ben.

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Christoph Hellwig @ 2018-06-04 12:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: robh, Benjamin Herrenschmidt, linux-kernel, virtualization, hch,
	mpe, joe, david, linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <20180604153558-mutt-send-email-mst@kernel.org>

On Mon, Jun 04, 2018 at 03:43:09PM +0300, Michael S. Tsirkin wrote:
> Another is that given the basic functionality is in there, optimizations
> can possibly wait until per-device quirks in DMA API are supported.

We have had per-device dma_ops for quite a while.

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Benjamin Herrenschmidt @ 2018-06-04 13:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: robh, mpe, linux-kernel, virtualization, hch, joe, david,
	linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <20180604153558-mutt-send-email-mst@kernel.org>

On Mon, 2018-06-04 at 15:43 +0300, Michael S. Tsirkin wrote:
> On Thu, May 24, 2018 at 08:27:04AM +1000, Benjamin Herrenschmidt wrote:
> > On Wed, 2018-05-23 at 21:50 +0300, Michael S. Tsirkin wrote:
> > 
> > > I re-read that discussion and I'm still unclear on the
> > > original question, since I got several apparently
> > > conflicting answers.
> > > 
> > > I asked:
> > > 
> > > 	Why isn't setting VIRTIO_F_IOMMU_PLATFORM on the
> > > 	hypervisor side sufficient?
> > 
> > I thought I had replied to this...
> > 
> > There are a couple of reasons:
> > 
> > - First qemu doesn't know that the guest will switch to "secure mode"
> > in advance. There is no difference between a normal and a secure
> > partition until the partition does the magic UV call to "enter secure
> > mode" and qemu doesn't see any of it. So who can set the flag here ?
> 
> The user should set it. You just tell user "to be able to use with
> feature X, enable IOMMU".

That's completely backwards. The user has no idea what that stuff is.
And it would have to percolate all the way up the management stack,
libvirt, kimchi, whatever else ... that's just nonsense.

Especially since, as I explained in my other email, this is *not* a
qemu problem and thus the solution shouldn't be messing around with
qemu.

> 
> > - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> > vhost) go through the emulated MMIO for every access to the guest,
> > which adds additional overhead.
> > 
> > Cheers,
> > Ben.
> 
> There are several answers to this.  One is that we are working hard to
> make overhead small when the mappings are static (which they would be if
> there's no actual IOMMU). So maybe especially given you are using
> a bounce buffer on top it's not so bad - did you try to
> benchmark?
> 
> Another is that given the basic functionality is in there, optimizations
> can possibly wait until per-device quirks in DMA API are supported.

The point is that requiring specific qemu command line arguments isn't
going to fly. We have additional problems due to the fact that our
firmware (SLOF) inside qemu doesn't currently deal with iommu's etc...
though those can be fixed.

Overall, however, this seems to be the most convoluted way of achieving
things, require user interventions where none should be needed etc...

Again, what's wrong with a 2 lines hook instead that solves it all and
completely avoids involving qemu ?

Ben.

> 
> > > 
> > > 
> > > >  arch/powerpc/include/asm/dma-mapping.h |  6 ++++++
> > > >  arch/powerpc/platforms/pseries/iommu.c | 11 +++++++++++
> > > >  drivers/virtio/virtio_ring.c           | 10 ++++++++++
> > > >  3 files changed, 27 insertions(+)
> > > > 
> > > > diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> > > > index 8fa3945..056e578 100644
> > > > --- a/arch/powerpc/include/asm/dma-mapping.h
> > > > +++ b/arch/powerpc/include/asm/dma-mapping.h
> > > > @@ -115,4 +115,10 @@ extern u64 __dma_get_required_mask(struct device *dev);
> > > >  #define ARCH_HAS_DMA_MMAP_COHERENT
> > > >  
> > > >  #endif /* __KERNEL__ */
> > > > +
> > > > +#define platform_forces_virtio_dma platform_forces_virtio_dma
> > > > +
> > > > +struct virtio_device;
> > > > +
> > > > +extern bool platform_forces_virtio_dma(struct virtio_device *vdev);
> > > >  #endif	/* _ASM_DMA_MAPPING_H */
> > > > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > > > index 06f0296..a2ec15a 100644
> > > > --- a/arch/powerpc/platforms/pseries/iommu.c
> > > > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > > > @@ -38,6 +38,7 @@
> > > >  #include <linux/of.h>
> > > >  #include <linux/iommu.h>
> > > >  #include <linux/rculist.h>
> > > > +#include <linux/virtio.h>
> > > >  #include <asm/io.h>
> > > >  #include <asm/prom.h>
> > > >  #include <asm/rtas.h>
> > > > @@ -1396,3 +1397,13 @@ static int __init disable_multitce(char *str)
> > > >  __setup("multitce=", disable_multitce);
> > > >  
> > > >  machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
> > > > +
> > > > +bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > > +{
> > > > +	/*
> > > > +	 * On protected guest platforms, force virtio core to use DMA
> > > > +	 * MAP API for all virtio devices. But there can also be some
> > > > +	 * exceptions for individual devices like virtio balloon.
> > > > +	 */
> > > > +	return (of_find_compatible_node(NULL, NULL, "ibm,ultravisor") != NULL);
> > > > +}
> > > 
> > > Isn't this kind of slow?  vring_use_dma_api is on
> > > data path and supposed to be very fast.
> > > 
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index 21d464a..47ea6c3 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -141,8 +141,18 @@ struct vring_virtqueue {
> > > >   * unconditionally on data path.
> > > >   */
> > > >  
> > > > +#ifndef platform_forces_virtio_dma
> > > > +static inline bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > > +{
> > > > +	return false;
> > > > +}
> > > > +#endif
> > > > +
> > > >  static bool vring_use_dma_api(struct virtio_device *vdev)
> > > >  {
> > > > +	if (platform_forces_virtio_dma(vdev))
> > > > +		return true;
> > > > +
> > > >  	if (!virtio_has_iommu_quirk(vdev))
> > > >  		return true;
> > > >  
> > > > -- 
> > > > 2.9.3

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Benjamin Herrenschmidt @ 2018-06-04 13:14 UTC (permalink / raw)
  To: Christoph Hellwig, Michael S. Tsirkin
  Cc: robh, mpe, linux-kernel, virtualization, joe, david, linuxppc-dev,
	elfring, Anshuman Khandual
In-Reply-To: <20180604125530.GA16378@infradead.org>

On Mon, 2018-06-04 at 05:55 -0700, Christoph Hellwig wrote:
> On Mon, Jun 04, 2018 at 03:43:09PM +0300, Michael S. Tsirkin wrote:
> > Another is that given the basic functionality is in there, optimizations
> > can possibly wait until per-device quirks in DMA API are supported.
> 
> We have had per-device dma_ops for quite a while.

I've asked Ansuman to start with a patch that converts virtio to use
DMA ops always, along with an init quirk to hookup "direct" ops when
the IOMMU flag isn't set.

This will at least remove that horrid duplication of code path we have
in there.

Then we can just involve the arch in that init quirk so we can chose an
alternate set of ops when running a secure VM.

This is completely orthogonal to whether an iommu exist qemu side or
not, and should be entirely solved on the Linux side.

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Michael S. Tsirkin @ 2018-06-04 16:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: robh, mpe, linux-kernel, virtualization, hch, joe, david,
	linuxppc-dev, elfring, Anshuman Khandual
In-Reply-To: <e7ceddbec11711a89282e9b70b7fd3c8af10b030.camel@kernel.crashing.org>

On Mon, Jun 04, 2018 at 11:11:52PM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2018-06-04 at 15:43 +0300, Michael S. Tsirkin wrote:
> > On Thu, May 24, 2018 at 08:27:04AM +1000, Benjamin Herrenschmidt wrote:
> > > On Wed, 2018-05-23 at 21:50 +0300, Michael S. Tsirkin wrote:
> > > 
> > > > I re-read that discussion and I'm still unclear on the
> > > > original question, since I got several apparently
> > > > conflicting answers.
> > > > 
> > > > I asked:
> > > > 
> > > > 	Why isn't setting VIRTIO_F_IOMMU_PLATFORM on the
> > > > 	hypervisor side sufficient?
> > > 
> > > I thought I had replied to this...
> > > 
> > > There are a couple of reasons:
> > > 
> > > - First qemu doesn't know that the guest will switch to "secure mode"
> > > in advance. There is no difference between a normal and a secure
> > > partition until the partition does the magic UV call to "enter secure
> > > mode" and qemu doesn't see any of it. So who can set the flag here ?
> > 
> > The user should set it. You just tell user "to be able to use with
> > feature X, enable IOMMU".
> 
> That's completely backwards. The user has no idea what that stuff is.
> And it would have to percolate all the way up the management stack,
> libvirt, kimchi, whatever else ... that's just nonsense.
> 
> Especially since, as I explained in my other email, this is *not* a
> qemu problem and thus the solution shouldn't be messing around with
> qemu.

virtio is implemented in qemu though. If you prefer to stick
all your code in either guest or the UV that's your decision
but it looks like qemu could be helpful here.

For example what if you have a guest that passes physical addresses
to qemu bypassing swiotlb? Don't you want to detect
that and fail gracefully rather than crash the guest?
That's what VIRTIO_F_IOMMU_PLATFORM will do for you.

Still that's hypervisor's decision. What isn't up to the hypervisor is
the way we structure code. We made an early decision to merge a hack
with xen, among discussion about how with time DMA API will learn to
support per-device quirks and we'll be able to switch to that.
So let's do that now?

> > 
> > > - Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
> > > vhost) go through the emulated MMIO for every access to the guest,
> > > which adds additional overhead.
> > > 
> > > Cheers,
> > > Ben.
> > 
> > There are several answers to this.  One is that we are working hard to
> > make overhead small when the mappings are static (which they would be if
> > there's no actual IOMMU). So maybe especially given you are using
> > a bounce buffer on top it's not so bad - did you try to
> > benchmark?
> > 
> > Another is that given the basic functionality is in there, optimizations
> > can possibly wait until per-device quirks in DMA API are supported.
> 
> The point is that requiring specific qemu command line arguments isn't
> going to fly. We have additional problems due to the fact that our
> firmware (SLOF) inside qemu doesn't currently deal with iommu's etc...
> though those can be fixed.
> 
> Overall, however, this seems to be the most convoluted way of achieving
> things, require user interventions where none should be needed etc...
> 
> Again, what's wrong with a 2 lines hook instead that solves it all and
> completely avoids involving qemu ?
> 
> Ben.

That each platform wants to add hacks in this data path function.

> > 
> > > > 
> > > > 
> > > > >  arch/powerpc/include/asm/dma-mapping.h |  6 ++++++
> > > > >  arch/powerpc/platforms/pseries/iommu.c | 11 +++++++++++
> > > > >  drivers/virtio/virtio_ring.c           | 10 ++++++++++
> > > > >  3 files changed, 27 insertions(+)
> > > > > 
> > > > > diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> > > > > index 8fa3945..056e578 100644
> > > > > --- a/arch/powerpc/include/asm/dma-mapping.h
> > > > > +++ b/arch/powerpc/include/asm/dma-mapping.h
> > > > > @@ -115,4 +115,10 @@ extern u64 __dma_get_required_mask(struct device *dev);
> > > > >  #define ARCH_HAS_DMA_MMAP_COHERENT
> > > > >  
> > > > >  #endif /* __KERNEL__ */
> > > > > +
> > > > > +#define platform_forces_virtio_dma platform_forces_virtio_dma
> > > > > +
> > > > > +struct virtio_device;
> > > > > +
> > > > > +extern bool platform_forces_virtio_dma(struct virtio_device *vdev);
> > > > >  #endif	/* _ASM_DMA_MAPPING_H */
> > > > > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > > > > index 06f0296..a2ec15a 100644
> > > > > --- a/arch/powerpc/platforms/pseries/iommu.c
> > > > > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > > > > @@ -38,6 +38,7 @@
> > > > >  #include <linux/of.h>
> > > > >  #include <linux/iommu.h>
> > > > >  #include <linux/rculist.h>
> > > > > +#include <linux/virtio.h>
> > > > >  #include <asm/io.h>
> > > > >  #include <asm/prom.h>
> > > > >  #include <asm/rtas.h>
> > > > > @@ -1396,3 +1397,13 @@ static int __init disable_multitce(char *str)
> > > > >  __setup("multitce=", disable_multitce);
> > > > >  
> > > > >  machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
> > > > > +
> > > > > +bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > > > +{
> > > > > +	/*
> > > > > +	 * On protected guest platforms, force virtio core to use DMA
> > > > > +	 * MAP API for all virtio devices. But there can also be some
> > > > > +	 * exceptions for individual devices like virtio balloon.
> > > > > +	 */
> > > > > +	return (of_find_compatible_node(NULL, NULL, "ibm,ultravisor") != NULL);
> > > > > +}
> > > > 
> > > > Isn't this kind of slow?  vring_use_dma_api is on
> > > > data path and supposed to be very fast.
> > > > 
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index 21d464a..47ea6c3 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -141,8 +141,18 @@ struct vring_virtqueue {
> > > > >   * unconditionally on data path.
> > > > >   */
> > > > >  
> > > > > +#ifndef platform_forces_virtio_dma
> > > > > +static inline bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > > > > +{
> > > > > +	return false;
> > > > > +}
> > > > > +#endif
> > > > > +
> > > > >  static bool vring_use_dma_api(struct virtio_device *vdev)
> > > > >  {
> > > > > +	if (platform_forces_virtio_dma(vdev))
> > > > > +		return true;
> > > > > +
> > > > >  	if (!virtio_has_iommu_quirk(vdev))
> > > > >  		return true;
> > > > >  
> > > > > -- 
> > > > > 2.9.3

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox