[PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
@ 2022-11-29  4:02 Jason Wang
  2022-11-29  9:52 ` Eric Auger
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2022-11-29  4:02 UTC (permalink / raw)
  To: mst; +Cc: qemu-devel, eric.auger, Jason Wang, qemu-stable, Lei Yang,
	Yalan Zhang

When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
GPA. So we need to translate it to GPA before the syncing otherwise we
may hit the following crash since IOVA could be out of the scope of
the GPA log size. This could be noted when using virtio-IOMMU with
vhost using 1G memory.

Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
Cc: qemu-stable@nongnu.org
Tested-by: Lei Yang <leiyang@redhat.com>
Reported-by: Yalan Zhang <yalzhang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes since V1:
- Fix the address calculation when used ring is not page aligned
- Fix the length for each round of dirty bitmap syncing
- Use LOG_GUEST_ERROR to log wrong used adddress
- Various other tweaks
---
 hw/virtio/vhost.c | 76 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 20 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index d1c4c20b8c..0cd5f25fcb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -20,6 +20,7 @@
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qemu/memfd.h"
+#include "qemu/log.h"
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
     }
 }
 
+static bool vhost_dev_has_iommu(struct vhost_dev *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+
+    /*
+     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
+     * incremental memory mapping API via IOTLB API. For platform that
+     * does not have IOMMU, there's no need to enable this feature
+     * which may cause unnecessary IOTLB miss/update transactions.
+     */
+    if (vdev) {
+        return virtio_bus_device_iommu_enabled(vdev) &&
+            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    } else {
+        return false;
+    }
+}
+
 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
                                    MemoryRegionSection *section,
                                    hwaddr first,
@@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
             continue;
         }
 
-        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
-                              range_get_last(vq->used_phys, vq->used_size));
+        if (vhost_dev_has_iommu(dev)) {
+            IOMMUTLBEntry iotlb;
+            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
+            hwaddr phys, s;
+
+            while (used_size) {
+                rcu_read_lock();
+                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
+                                                      used_phys,
+                                                      true, MEMTXATTRS_UNSPECIFIED);
+                rcu_read_unlock();
+
+                if (!iotlb.target_as) {
+                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
+                                  "failure for used_phys %"PRIx64"\n", used_phys);
+                    return -EINVAL;
+                }
+
+                phys = iotlb.translated_addr + (used_phys & iotlb.addr_mask);
+
+                /* Distance from start of used ring until last byte of
+                   IOMMU page */
+                s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
+                /* Size of used ring, or of the part of it until end
+                   of IOMMU page */
+                s = MIN(s, used_size - 1) + 1;
+
+                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
+                                      range_get_last(phys, s));
+                used_size -= s;
+                used_phys += s;
+            }
+        } else {
+            vhost_dev_sync_region(dev, section, start_addr,
+                                  end_addr, vq->used_phys,
+                                  range_get_last(vq->used_phys, vq->used_size));
+        }
     }
     return 0;
 }
@@ -306,24 +360,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
     dev->log_size = size;
 }
 
-static bool vhost_dev_has_iommu(struct vhost_dev *dev)
-{
-    VirtIODevice *vdev = dev->vdev;
-
-    /*
-     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
-     * incremental memory mapping API via IOTLB API. For platform that
-     * does not have IOMMU, there's no need to enable this feature
-     * which may cause unnecessary IOTLB miss/update transactions.
-     */
-    if (vdev) {
-        return virtio_bus_device_iommu_enabled(vdev) &&
-            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
-    } else {
-        return false;
-    }
-}
-
 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
                               hwaddr *plen, bool is_write)
 {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
  2022-11-29  4:02 [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled Jason Wang
@ 2022-11-29  9:52 ` Eric Auger
  2022-11-29 15:44   ` Michael S. Tsirkin
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Auger @ 2022-11-29  9:52 UTC (permalink / raw)
  To: Jason Wang, mst; +Cc: qemu-devel, qemu-stable, Lei Yang, Yalan Zhang

Hi Jason,

On 11/29/22 05:02, Jason Wang wrote:
> When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
> GPA. So we need to translate it to GPA before the syncing otherwise we
> may hit the following crash since IOVA could be out of the scope of
> the GPA log size. This could be noted when using virtio-IOMMU with
> vhost using 1G memory.
>
> Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
> Cc: qemu-stable@nongnu.org
> Tested-by: Lei Yang <leiyang@redhat.com>
> Reported-by: Yalan Zhang <yalzhang@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes since V1:
> - Fix the address calculation when used ring is not page aligned
> - Fix the length for each round of dirty bitmap syncing
> - Use LOG_GUEST_ERROR to log wrong used adddress
> - Various other tweaks
> ---
>  hw/virtio/vhost.c | 76 ++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 56 insertions(+), 20 deletions(-)
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index d1c4c20b8c..0cd5f25fcb 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -20,6 +20,7 @@
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
>  #include "qemu/memfd.h"
> +#include "qemu/log.h"
>  #include "standard-headers/linux/vhost_types.h"
>  #include "hw/virtio/virtio-bus.h"
>  #include "hw/virtio/virtio-access.h"
> @@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
>      }
>  }
>  
> +static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> +{
> +    VirtIODevice *vdev = dev->vdev;
> +
> +    /*
> +     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> +     * incremental memory mapping API via IOTLB API. For platform that
> +     * does not have IOMMU, there's no need to enable this feature
> +     * which may cause unnecessary IOTLB miss/update transactions.
> +     */
> +    if (vdev) {
> +        return virtio_bus_device_iommu_enabled(vdev) &&
> +            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> +    } else {
> +        return false;
> +    }
> +}
> +
>  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>                                     MemoryRegionSection *section,
>                                     hwaddr first,
> @@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>              continue;
>          }
>  
> -        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
> -                              range_get_last(vq->used_phys, vq->used_size));
> +        if (vhost_dev_has_iommu(dev)) {
> +            IOMMUTLBEntry iotlb;
> +            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
> +            hwaddr phys, s;
> +
> +            while (used_size) {
> +                rcu_read_lock();
> +                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
> +                                                      used_phys,
> +                                                      true, MEMTXATTRS_UNSPECIFIED);
> +                rcu_read_unlock();
> +
> +                if (!iotlb.target_as) {
> +                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
> +                                  "failure for used_phys %"PRIx64"\n", used_phys);
looks weird to see translation of "used_phys" whereas it is an iova. At
least I would reword the msg
> +                    return -EINVAL;
> +                }
> +
> +                phys = iotlb.translated_addr + (used_phys & iotlb.addr_mask);
you may use a local variable storing this offset =

used_phys & iotlb.addr_mask

> +
> +                /* Distance from start of used ring until last byte of
> +                   IOMMU page */
you can avoid checkpatch warnings here
> +                s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
> +                /* Size of used ring, or of the part of it until end
> +                   of IOMMU page */
and here

I would suggest to rewrite this into
s =iotlb.addr_mask - (used_phys & iotlb.addr_mask) + 1
s = MIN(s, used_size);
> +                s = MIN(s, used_size - 1) + 1;
> +
> +                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
> +                                      range_get_last(phys, s));
> +                used_size -= s;
> +                used_phys += s;
> +            }
> +        } else {
> +            vhost_dev_sync_region(dev, section, start_addr,
> +                                  end_addr, vq->used_phys,
> +                                  range_get_last(vq->used_phys, vq->used_size));
> +        }
>      }
>      return 0;
>  }
> @@ -306,24 +360,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
>      dev->log_size = size;
>  }
>  
> -static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> -{
> -    VirtIODevice *vdev = dev->vdev;
> -
> -    /*
> -     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> -     * incremental memory mapping API via IOTLB API. For platform that
> -     * does not have IOMMU, there's no need to enable this feature
> -     * which may cause unnecessary IOTLB miss/update transactions.
> -     */
> -    if (vdev) {
> -        return virtio_bus_device_iommu_enabled(vdev) &&
> -            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> -    } else {
> -        return false;
> -    }
> -}
> -
>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
>                                hwaddr *plen, bool is_write)
>  {
Besides,

Tested-by: Eric Auger <eric.auger@redhat.com>

Eric



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
  2022-11-29  9:52 ` Eric Auger
@ 2022-11-29 15:44   ` Michael S. Tsirkin
  2022-11-29 16:08     ` Eric Auger
  0 siblings, 1 reply; 5+ messages in thread
From: Michael S. Tsirkin @ 2022-11-29 15:44 UTC (permalink / raw)
  To: Eric Auger; +Cc: Jason Wang, qemu-devel, qemu-stable, Lei Yang, Yalan Zhang

On Tue, Nov 29, 2022 at 10:52:29AM +0100, Eric Auger wrote:
> Hi Jason,
> 
> On 11/29/22 05:02, Jason Wang wrote:
> > When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
> > GPA. So we need to translate it to GPA before the syncing otherwise we
> > may hit the following crash since IOVA could be out of the scope of
> > the GPA log size. This could be noted when using virtio-IOMMU with
> > vhost using 1G memory.
> >
> > Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
> > Cc: qemu-stable@nongnu.org
> > Tested-by: Lei Yang <leiyang@redhat.com>
> > Reported-by: Yalan Zhang <yalzhang@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes since V1:
> > - Fix the address calculation when used ring is not page aligned
> > - Fix the length for each round of dirty bitmap syncing
> > - Use LOG_GUEST_ERROR to log wrong used adddress
> > - Various other tweaks
> > ---
> >  hw/virtio/vhost.c | 76 ++++++++++++++++++++++++++++++++++-------------
> >  1 file changed, 56 insertions(+), 20 deletions(-)
> >
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index d1c4c20b8c..0cd5f25fcb 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -20,6 +20,7 @@
> >  #include "qemu/range.h"
> >  #include "qemu/error-report.h"
> >  #include "qemu/memfd.h"
> > +#include "qemu/log.h"
> >  #include "standard-headers/linux/vhost_types.h"
> >  #include "hw/virtio/virtio-bus.h"
> >  #include "hw/virtio/virtio-access.h"
> > @@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
> >      }
> >  }
> >  
> > +static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> > +{
> > +    VirtIODevice *vdev = dev->vdev;
> > +
> > +    /*
> > +     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> > +     * incremental memory mapping API via IOTLB API. For platform that
> > +     * does not have IOMMU, there's no need to enable this feature
> > +     * which may cause unnecessary IOTLB miss/update transactions.
> > +     */
> > +    if (vdev) {
> > +        return virtio_bus_device_iommu_enabled(vdev) &&
> > +            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> > +    } else {
> > +        return false;
> > +    }
> > +}
> > +
> >  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
> >                                     MemoryRegionSection *section,
> >                                     hwaddr first,
> > @@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
> >              continue;
> >          }
> >  
> > -        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
> > -                              range_get_last(vq->used_phys, vq->used_size));
> > +        if (vhost_dev_has_iommu(dev)) {
> > +            IOMMUTLBEntry iotlb;
> > +            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
> > +            hwaddr phys, s;
> > +
> > +            while (used_size) {
> > +                rcu_read_lock();
> > +                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
> > +                                                      used_phys,
> > +                                                      true, MEMTXATTRS_UNSPECIFIED);
> > +                rcu_read_unlock();
> > +
> > +                if (!iotlb.target_as) {
> > +                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
> > +                                  "failure for used_phys %"PRIx64"\n", used_phys);
> looks weird to see translation of "used_phys" whereas it is an iova. At
> least I would reword the msg
> > +                    return -EINVAL;
> > +                }
> > +
> > +                phys = iotlb.translated_addr + (used_phys & iotlb.addr_mask);
> you may use a local variable storing this offset =
> 
> used_phys & iotlb.addr_mask
> 
> > +
> > +                /* Distance from start of used ring until last byte of
> > +                   IOMMU page */
> you can avoid checkpatch warnings here
> > +                s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
> > +                /* Size of used ring, or of the part of it until end
> > +                   of IOMMU page */
> and here
> 
> I would suggest to rewrite this into
> s =iotlb.addr_mask - (used_phys & iotlb.addr_mask) + 1
> s = MIN(s, used_size);

This does not work - if iotlb.addr_mask - (used_phys & iotlb.addr_mask)
is all-ones then + 1 gives you 0 and MIN gives you 0.
Theoretical but worth being safe here IMHO.


> > +                s = MIN(s, used_size - 1) + 1;
> > +
> > +                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
> > +                                      range_get_last(phys, s));
> > +                used_size -= s;
> > +                used_phys += s;
> > +            }
> > +        } else {
> > +            vhost_dev_sync_region(dev, section, start_addr,
> > +                                  end_addr, vq->used_phys,
> > +                                  range_get_last(vq->used_phys, vq->used_size));
> > +        }
> >      }
> >      return 0;
> >  }
> > @@ -306,24 +360,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
> >      dev->log_size = size;
> >  }
> >  
> > -static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> > -{
> > -    VirtIODevice *vdev = dev->vdev;
> > -
> > -    /*
> > -     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> > -     * incremental memory mapping API via IOTLB API. For platform that
> > -     * does not have IOMMU, there's no need to enable this feature
> > -     * which may cause unnecessary IOTLB miss/update transactions.
> > -     */
> > -    if (vdev) {
> > -        return virtio_bus_device_iommu_enabled(vdev) &&
> > -            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> > -    } else {
> > -        return false;
> > -    }
> > -}
> > -
> >  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
> >                                hwaddr *plen, bool is_write)
> >  {
> Besides,
> 
> Tested-by: Eric Auger <eric.auger@redhat.com>
> 
> Eric



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
  2022-11-29 15:44   ` Michael S. Tsirkin
@ 2022-11-29 16:08     ` Eric Auger
  2022-12-01  8:47       ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Auger @ 2022-11-29 16:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jason Wang, qemu-devel, qemu-stable, Lei Yang, Yalan Zhang

Hi Michael,

On 11/29/22 16:44, Michael S. Tsirkin wrote:
> On Tue, Nov 29, 2022 at 10:52:29AM +0100, Eric Auger wrote:
>> Hi Jason,
>>
>> On 11/29/22 05:02, Jason Wang wrote:
>>> When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
>>> GPA. So we need to translate it to GPA before the syncing otherwise we
>>> may hit the following crash since IOVA could be out of the scope of
>>> the GPA log size. This could be noted when using virtio-IOMMU with
>>> vhost using 1G memory.
>>>
>>> Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
>>> Cc: qemu-stable@nongnu.org
>>> Tested-by: Lei Yang <leiyang@redhat.com>
>>> Reported-by: Yalan Zhang <yalzhang@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>> Changes since V1:
>>> - Fix the address calculation when used ring is not page aligned
>>> - Fix the length for each round of dirty bitmap syncing
>>> - Use LOG_GUEST_ERROR to log wrong used adddress
>>> - Various other tweaks
>>> ---
>>>  hw/virtio/vhost.c | 76 ++++++++++++++++++++++++++++++++++-------------
>>>  1 file changed, 56 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>> index d1c4c20b8c..0cd5f25fcb 100644
>>> --- a/hw/virtio/vhost.c
>>> +++ b/hw/virtio/vhost.c
>>> @@ -20,6 +20,7 @@
>>>  #include "qemu/range.h"
>>>  #include "qemu/error-report.h"
>>>  #include "qemu/memfd.h"
>>> +#include "qemu/log.h"
>>>  #include "standard-headers/linux/vhost_types.h"
>>>  #include "hw/virtio/virtio-bus.h"
>>>  #include "hw/virtio/virtio-access.h"
>>> @@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
>>>      }
>>>  }
>>>  
>>> +static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>>> +{
>>> +    VirtIODevice *vdev = dev->vdev;
>>> +
>>> +    /*
>>> +     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
>>> +     * incremental memory mapping API via IOTLB API. For platform that
>>> +     * does not have IOMMU, there's no need to enable this feature
>>> +     * which may cause unnecessary IOTLB miss/update transactions.
>>> +     */
>>> +    if (vdev) {
>>> +        return virtio_bus_device_iommu_enabled(vdev) &&
>>> +            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
>>> +    } else {
>>> +        return false;
>>> +    }
>>> +}
>>> +
>>>  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>>>                                     MemoryRegionSection *section,
>>>                                     hwaddr first,
>>> @@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>>>              continue;
>>>          }
>>>  
>>> -        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
>>> -                              range_get_last(vq->used_phys, vq->used_size));
>>> +        if (vhost_dev_has_iommu(dev)) {
>>> +            IOMMUTLBEntry iotlb;
>>> +            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
>>> +            hwaddr phys, s;
>>> +
>>> +            while (used_size) {
>>> +                rcu_read_lock();
>>> +                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
>>> +                                                      used_phys,
>>> +                                                      true, MEMTXATTRS_UNSPECIFIED);
>>> +                rcu_read_unlock();
>>> +
>>> +                if (!iotlb.target_as) {
>>> +                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
>>> +                                  "failure for used_phys %"PRIx64"\n", used_phys);
>> looks weird to see translation of "used_phys" whereas it is an iova. At
>> least I would reword the msg
>>> +                    return -EINVAL;
>>> +                }
>>> +
>>> +                phys = iotlb.translated_addr + (used_phys & iotlb.addr_mask);
>> you may use a local variable storing this offset =
>>
>> used_phys & iotlb.addr_mask
>>
>>> +
>>> +                /* Distance from start of used ring until last byte of
>>> +                   IOMMU page */
>> you can avoid checkpatch warnings here
>>> +                s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
>>> +                /* Size of used ring, or of the part of it until end
>>> +                   of IOMMU page */
>> and here
>>
>> I would suggest to rewrite this into
>> s =iotlb.addr_mask - (used_phys & iotlb.addr_mask) + 1
>> s = MIN(s, used_size);
> This does not work - if iotlb.addr_mask - (used_phys & iotlb.addr_mask)
> is all-ones then + 1 gives you 0 and MIN gives you 0.
> Theoretical but worth being safe here IMHO.
Ah OK, I should have read your previous discussion more thoroughly ...
Maybe just add a short comment then to justify the gym below and avoid
tempting sbdy else to rewrite it in a more common but wrong way.

Thanks

Eric
>
>
>>> +                s = MIN(s, used_size - 1) + 1;
>>> +
>>> +                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
>>> +                                      range_get_last(phys, s));
>>> +                used_size -= s;
>>> +                used_phys += s;
>>> +            }
>>> +        } else {
>>> +            vhost_dev_sync_region(dev, section, start_addr,
>>> +                                  end_addr, vq->used_phys,
>>> +                                  range_get_last(vq->used_phys, vq->used_size));
>>> +        }
>>>      }
>>>      return 0;
>>>  }
>>> @@ -306,24 +360,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
>>>      dev->log_size = size;
>>>  }
>>>  
>>> -static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>>> -{
>>> -    VirtIODevice *vdev = dev->vdev;
>>> -
>>> -    /*
>>> -     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
>>> -     * incremental memory mapping API via IOTLB API. For platform that
>>> -     * does not have IOMMU, there's no need to enable this feature
>>> -     * which may cause unnecessary IOTLB miss/update transactions.
>>> -     */
>>> -    if (vdev) {
>>> -        return virtio_bus_device_iommu_enabled(vdev) &&
>>> -            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
>>> -    } else {
>>> -        return false;
>>> -    }
>>> -}
>>> -
>>>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
>>>                                hwaddr *plen, bool is_write)
>>>  {
>> Besides,
>>
>> Tested-by: Eric Auger <eric.auger@redhat.com>
>>
>> Eric



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
  2022-11-29 16:08     ` Eric Auger
@ 2022-12-01  8:47       ` Jason Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Jason Wang @ 2022-12-01  8:47 UTC (permalink / raw)
  To: eric.auger
  Cc: Michael S. Tsirkin, qemu-devel, qemu-stable, Lei Yang,
	Yalan Zhang

On Wed, Nov 30, 2022 at 12:08 AM Eric Auger <eric.auger@redhat.com> wrote:
>
> Hi Michael,
>
> On 11/29/22 16:44, Michael S. Tsirkin wrote:
> > On Tue, Nov 29, 2022 at 10:52:29AM +0100, Eric Auger wrote:
> >> Hi Jason,
> >>
> >> On 11/29/22 05:02, Jason Wang wrote:
> >>> When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
> >>> GPA. So we need to translate it to GPA before the syncing otherwise we
> >>> may hit the following crash since IOVA could be out of the scope of
> >>> the GPA log size. This could be noted when using virtio-IOMMU with
> >>> vhost using 1G memory.
> >>>
> >>> Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
> >>> Cc: qemu-stable@nongnu.org
> >>> Tested-by: Lei Yang <leiyang@redhat.com>
> >>> Reported-by: Yalan Zhang <yalzhang@redhat.com>
> >>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>> ---
> >>> Changes since V1:
> >>> - Fix the address calculation when used ring is not page aligned
> >>> - Fix the length for each round of dirty bitmap syncing
> >>> - Use LOG_GUEST_ERROR to log wrong used adddress
> >>> - Various other tweaks
> >>> ---
> >>>  hw/virtio/vhost.c | 76 ++++++++++++++++++++++++++++++++++-------------
> >>>  1 file changed, 56 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >>> index d1c4c20b8c..0cd5f25fcb 100644
> >>> --- a/hw/virtio/vhost.c
> >>> +++ b/hw/virtio/vhost.c
> >>> @@ -20,6 +20,7 @@
> >>>  #include "qemu/range.h"
> >>>  #include "qemu/error-report.h"
> >>>  #include "qemu/memfd.h"
> >>> +#include "qemu/log.h"
> >>>  #include "standard-headers/linux/vhost_types.h"
> >>>  #include "hw/virtio/virtio-bus.h"
> >>>  #include "hw/virtio/virtio-access.h"
> >>> @@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
> >>>      }
> >>>  }
> >>>
> >>> +static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> >>> +{
> >>> +    VirtIODevice *vdev = dev->vdev;
> >>> +
> >>> +    /*
> >>> +     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> >>> +     * incremental memory mapping API via IOTLB API. For platform that
> >>> +     * does not have IOMMU, there's no need to enable this feature
> >>> +     * which may cause unnecessary IOTLB miss/update transactions.
> >>> +     */
> >>> +    if (vdev) {
> >>> +        return virtio_bus_device_iommu_enabled(vdev) &&
> >>> +            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> >>> +    } else {
> >>> +        return false;
> >>> +    }
> >>> +}
> >>> +
> >>>  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
> >>>                                     MemoryRegionSection *section,
> >>>                                     hwaddr first,
> >>> @@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
> >>>              continue;
> >>>          }
> >>>
> >>> -        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
> >>> -                              range_get_last(vq->used_phys, vq->used_size));
> >>> +        if (vhost_dev_has_iommu(dev)) {
> >>> +            IOMMUTLBEntry iotlb;
> >>> +            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
> >>> +            hwaddr phys, s;
> >>> +
> >>> +            while (used_size) {
> >>> +                rcu_read_lock();
> >>> +                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
> >>> +                                                      used_phys,
> >>> +                                                      true, MEMTXATTRS_UNSPECIFIED);
> >>> +                rcu_read_unlock();
> >>> +
> >>> +                if (!iotlb.target_as) {
> >>> +                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
> >>> +                                  "failure for used_phys %"PRIx64"\n", used_phys);
> >> looks weird to see translation of "used_phys" whereas it is an iova. At
> >> least I would reword the msg

Let me tweak this in the next version.

> >>> +                    return -EINVAL;
> >>> +                }
> >>> +
> >>> +                phys = iotlb.translated_addr + (used_phys & iotlb.addr_mask);
> >> you may use a local variable storing this offset =
> >>
> >> used_phys & iotlb.addr_mask

Ok.

> >>
> >>> +
> >>> +                /* Distance from start of used ring until last byte of
> >>> +                   IOMMU page */
> >> you can avoid checkpatch warnings here
> >>> +                s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
> >>> +                /* Size of used ring, or of the part of it until end
> >>> +                   of IOMMU page */
> >> and here

Will fix.

> >>
> >> I would suggest to rewrite this into
> >> s =iotlb.addr_mask - (used_phys & iotlb.addr_mask) + 1
> >> s = MIN(s, used_size);
> > This does not work - if iotlb.addr_mask - (used_phys & iotlb.addr_mask)
> > is all-ones then + 1 gives you 0 and MIN gives you 0.
> > Theoretical but worth being safe here IMHO.
> Ah OK, I should have read your previous discussion more thoroughly ...
> Maybe just add a short comment then to justify the gym below and avoid
> tempting sbdy else to rewrite it in a more common but wrong way.

That's fine.

Thanks

>
> Thanks
>
> Eric
> >
> >
> >>> +                s = MIN(s, used_size - 1) + 1;
> >>> +
> >>> +                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
> >>> +                                      range_get_last(phys, s));
> >>> +                used_size -= s;
> >>> +                used_phys += s;
> >>> +            }
> >>> +        } else {
> >>> +            vhost_dev_sync_region(dev, section, start_addr,
> >>> +                                  end_addr, vq->used_phys,
> >>> +                                  range_get_last(vq->used_phys, vq->used_size));
> >>> +        }
> >>>      }
> >>>      return 0;
> >>>  }
> >>> @@ -306,24 +360,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
> >>>      dev->log_size = size;
> >>>  }
> >>>
> >>> -static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> >>> -{
> >>> -    VirtIODevice *vdev = dev->vdev;
> >>> -
> >>> -    /*
> >>> -     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
> >>> -     * incremental memory mapping API via IOTLB API. For platform that
> >>> -     * does not have IOMMU, there's no need to enable this feature
> >>> -     * which may cause unnecessary IOTLB miss/update transactions.
> >>> -     */
> >>> -    if (vdev) {
> >>> -        return virtio_bus_device_iommu_enabled(vdev) &&
> >>> -            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> >>> -    } else {
> >>> -        return false;
> >>> -    }
> >>> -}
> >>> -
> >>>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
> >>>                                hwaddr *plen, bool is_write)
> >>>  {
> >> Besides,
> >>
> >> Tested-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> Eric
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-12-01  8:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-29  4:02 [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled Jason Wang
2022-11-29  9:52 ` Eric Auger
2022-11-29 15:44   ` Michael S. Tsirkin
2022-11-29 16:08     ` Eric Auger
2022-12-01  8:47       ` Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).