All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Kirti Wankhede <kwankhede@nvidia.com>
Cc: cohuck@redhat.com, cjia@nvidia.com, aik@ozlabs.ru,
	Zhengxiao.zx@alibaba-inc.com, shuangtai.tst@alibaba-inc.com,
	qemu-devel@nongnu.org, peterx@redhat.com, eauger@redhat.com,
	yi.l.liu@intel.com, quintela@redhat.com, ziye.yang@intel.com,
	armbru@redhat.com, mlevitsk@redhat.com, pasic@linux.ibm.com,
	felipe@nutanix.com, zhi.a.wang@intel.com, kevin.tian@intel.com,
	yan.y.zhao@intel.com, alex.williamson@redhat.com,
	changpeng.liu@intel.com, eskultet@redhat.com, Ken.Xue@amd.com,
	jonathan.davies@nutanix.com, pbonzini@redhat.com
Subject: Re: [PATCH QEMU v23 16/18] vfio: Add ioctl to get dirty pages bitmap during dma unmap.
Date: Thu, 21 May 2020 20:05:41 +0100	[thread overview]
Message-ID: <20200521190541.GN2752@work-vm> (raw)
In-Reply-To: <1589999088-31477-17-git-send-email-kwankhede@nvidia.com>

* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> With vIOMMU, IO virtual address range can get unmapped while in pre-copy
> phase of migration. In that case, unmap ioctl should return pages pinned
> in that range and QEMU should find its correcponding guest physical
> addresses and report those dirty.
> 
> Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 80 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 3f7049fbd1b0..2dd9e8b24788 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -311,11 +311,82 @@ static bool vfio_devices_are_stopped_and_saving(void)
>      return true;
>  }
>  
> +static bool vfio_devices_are_running_and_saving(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if ((vbasedev->device_state & VFIO_DEVICE_STATE_SAVING) &&
> +                (vbasedev->device_state & VFIO_DEVICE_STATE_RUNNING)) {
> +                continue;
> +            } else {
> +                return false;
> +            }
> +        }
> +    }
> +    return true;
> +}
> +
> +static int vfio_dma_unmap_bitmap(VFIOContainer *container,
> +                                 hwaddr iova, ram_addr_t size,
> +                                 IOMMUTLBEntry *iotlb)
> +{
> +    struct vfio_iommu_type1_dma_unmap *unmap;
> +    struct vfio_bitmap *bitmap;
> +    uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS;
> +    int ret;
> +
> +    unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
> +    if (!unmap) {
> +        return -ENOMEM;
> +    }

Again that's a fixed size thing so you don't really need to malloc it;
g_malloc0 won't return NULL; it'll just assert (which it shouldn't
do anyway because those structures are small).  Personally I'd just put
it on the stack.

> +    unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
> +    unmap->iova = iova;
> +    unmap->size = size;
> +    unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
> +    bitmap = (struct vfio_bitmap *)&unmap->data;
> +
> +    /*
> +     * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
> +     * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to
> +     * TARGET_PAGE_SIZE.
> +     */
> +
> +    bitmap->pgsize = TARGET_PAGE_SIZE;
> +    bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
> +                   BITS_PER_BYTE;
> +
> +    if (bitmap->size > container->max_dirty_bitmap_size) {
> +        error_report("UNMAP: Size of bitmap too big 0x%llx", bitmap->size);
> +        ret = -E2BIG;
> +        goto unmap_exit;
> +    }
> +
> +    bitmap->data = g_malloc0(bitmap->size);

That one should be a g_try_malloc0 and check it's non-null.

> +
> +    ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> +    if (!ret) {
> +        cpu_physical_memory_set_dirty_lebitmap((uint64_t *)bitmap->data,
> +                iotlb->translated_addr, pages);
> +    } else {
> +        error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %d", -errno);

You're allowed touse %m in hw/vfio for printing the error nicely.

> +    }
> +
> +    g_free(bitmap->data);
> +unmap_exit:
> +    g_free(unmap);
> +    return ret;
> +}
> +
>  /*
>   * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
>   */
>  static int vfio_dma_unmap(VFIOContainer *container,
> -                          hwaddr iova, ram_addr_t size)
> +                          hwaddr iova, ram_addr_t size,
> +                          IOMMUTLBEntry *iotlb)
>  {
>      struct vfio_iommu_type1_dma_unmap unmap = {
>          .argsz = sizeof(unmap),
> @@ -324,6 +395,11 @@ static int vfio_dma_unmap(VFIOContainer *container,
>          .size = size,
>      };
>  
> +    if (iotlb && container->dirty_pages_supported &&
> +        vfio_devices_are_running_and_saving()) {
> +        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> +    }
> +
>      while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
>          /*
>           * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> @@ -371,7 +447,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
>       * the VGA ROM space.
>       */
>      if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
> -        (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
> +        (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
>           ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
>          return 0;
>      }
> @@ -519,7 +595,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>                           iotlb->addr_mask + 1, vaddr, ret);
>          }
>      } else {
> -        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
> +        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
>          if (ret) {
>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx") = %d (%m)",
> @@ -822,7 +898,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>      }
>  
>      if (try_unmap) {
> -        ret = vfio_dma_unmap(container, iova, int128_get64(llsize));
> +        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
>          if (ret) {
>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx") = %d (%m)",
> -- 
> 2.7.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



  reply	other threads:[~2020-05-21 19:06 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-20 18:24 [PATCH QEMU v23 00/18] Add migration support for VFIO devices Kirti Wankhede
2020-05-20 18:24 ` [PATCH QEMU v23 01/18] vfio: KABI for migration interface - Kernel header placeholder Kirti Wankhede
2020-05-20 18:24 ` [PATCH QEMU v23 02/18] vfio: Add function to unmap VFIO region Kirti Wankhede
2020-05-20 18:24 ` [PATCH QEMU v23 03/18] vfio: Add vfio_get_object callback to VFIODeviceOps Kirti Wankhede
2020-05-20 18:24 ` [PATCH QEMU v23 04/18] vfio: Add save and load functions for VFIO PCI devices Kirti Wankhede
2020-05-21  9:50   ` Dr. David Alan Gilbert
2020-05-21 12:12     ` Kirti Wankhede
2020-05-21 19:28       ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 05/18] vfio: Add migration region initialization and finalize function Kirti Wankhede
2020-05-21  9:59   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 06/18] vfio: Add VM state change handler to know state of VM Kirti Wankhede
2020-05-21 11:19   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 07/18] vfio: Add migration state change notifier Kirti Wankhede
2020-05-21 11:31   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 08/18] vfio: Register SaveVMHandlers for VFIO device Kirti Wankhede
2020-05-21 14:18   ` Dr. David Alan Gilbert
2020-05-21 18:00     ` Kirti Wankhede
2020-05-21 19:35       ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 09/18] vfio: Add save state functions to SaveVMHandlers Kirti Wankhede
2020-05-21 15:37   ` Dr. David Alan Gilbert
2020-05-21 20:43   ` Peter Xu
2020-05-20 18:24 ` [PATCH QEMU v23 10/18] vfio: Add load " Kirti Wankhede
2020-05-21 15:53   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 11/18] iommu: add callback to get address limit IOMMU supports Kirti Wankhede
2020-05-21 16:13   ` Peter Xu
2020-05-20 18:24 ` [PATCH QEMU v23 12/18] memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled Kirti Wankhede
2020-05-21 16:20   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 13/18] vfio: Get migration capability flags for container Kirti Wankhede
2020-05-20 18:24 ` [PATCH QEMU v23 14/18] vfio: Add function to start and stop dirty pages tracking Kirti Wankhede
2020-05-21 16:50   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 15/18] vfio: Add vfio_listener_log_sync to mark dirty pages Kirti Wankhede
2020-05-21 18:52   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 16/18] vfio: Add ioctl to get dirty pages bitmap during dma unmap Kirti Wankhede
2020-05-21 19:05   ` Dr. David Alan Gilbert [this message]
2020-05-20 18:24 ` [PATCH QEMU v23 17/18] vfio: Make vfio-pci device migration capable Kirti Wankhede
2020-05-21 19:16   ` Dr. David Alan Gilbert
2020-05-20 18:24 ` [PATCH QEMU v23 18/18] qapi: Add VFIO devices migration stats in Migration stats Kirti Wankhede
2020-05-21 19:23   ` Dr. David Alan Gilbert
2020-05-25 14:34   ` Markus Armbruster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200521190541.GN2752@work-vm \
    --to=dgilbert@redhat.com \
    --cc=Ken.Xue@amd.com \
    --cc=Zhengxiao.zx@alibaba-inc.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=armbru@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=cjia@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=eauger@redhat.com \
    --cc=eskultet@redhat.com \
    --cc=felipe@nutanix.com \
    --cc=jonathan.davies@nutanix.com \
    --cc=kevin.tian@intel.com \
    --cc=kwankhede@nvidia.com \
    --cc=mlevitsk@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=shuangtai.tst@alibaba-inc.com \
    --cc=yan.y.zhao@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhi.a.wang@intel.com \
    --cc=ziye.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.