qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Cédric Le Goater" <clg@redhat.com>
To: John Johnson <john.g.johnson@oracle.com>, qemu-devel@nongnu.org
Subject: Re: [PATCH v1 03/24] vfio-user: add container IO ops vector
Date: Fri, 9 Dec 2022 17:10:55 +0100	[thread overview]
Message-ID: <f3cda64b-ed4b-5e4d-5a0d-82c3b3861d41@redhat.com> (raw)
In-Reply-To: <d357c8c243ef839cc8e41fc1ae5c8db2d98fc2cc.1667542066.git.john.g.johnson@oracle.com>

Hello John,

On 11/9/22 00:13, John Johnson wrote:
> Used for communication with VFIO driver
> (prep work for vfio-user, which will communicate over a socket)
> 
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> ---
>   hw/vfio/common.c              | 126 ++++++++++++++++++++++++++++--------------
>   include/hw/vfio/vfio-common.h |  33 +++++++++++
>   2 files changed, 117 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index ace9562..83d69b9 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
>           goto unmap_exit;
>       }
>   
> -    ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> +    ret = CONT_DMA_UNMAP(container, unmap, bitmap);

I am not sure these macros are very useful, compared to :

     container->ops->dma_unmap(container, unmap, bitmap);

>       if (!ret) {
>           cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
>                   iotlb->translated_addr, pages);
>       } else {
> -        error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
> +        error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret));
>       }
>   
>       g_free(bitmap->data);
> @@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
>           return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>       }
>   
> -    while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
> -        /*
> -         * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> -         * v4.15) where an overflow in its wrap-around check prevents us from
> -         * unmapping the last page of the address space.  Test for the error
> -         * condition and re-try the unmap excluding the last page.  The
> -         * expectation is that we've never mapped the last page anyway and this
> -         * unmap request comes via vIOMMU support which also makes it unlikely
> -         * that this page is used.  This bug was introduced well after type1 v2
> -         * support was introduced, so we shouldn't need to test for v1.  A fix
> -         * is queued for kernel v5.0 so this workaround can be removed once
> -         * affected kernels are sufficiently deprecated.
> -         */
> -        if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
> -            container->iommu_type == VFIO_TYPE1v2_IOMMU) {
> -            trace_vfio_dma_unmap_overflow_workaround();
> -            unmap.size -= 1ULL << ctz64(container->pgsizes);
> -            continue;
> -        }
> -        error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
> -        return -errno;
> -    }
> -
> -    return 0;
> +    return CONT_DMA_UNMAP(container, &unmap, NULL);
>   }
>   
>   static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> @@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
>           .iova = iova,
>           .size = size,
>       };
> +    int ret;
>   
>       if (!readonly) {
>           map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>       }
>   
> -    /*
> -     * Try the mapping, if it fails with EBUSY, unmap the region and try
> -     * again.  This shouldn't be necessary, but we sometimes see it in
> -     * the VGA ROM space.
> -     */
> -    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
> -        (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
> -         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
> -        return 0;
> -    }
> +    ret = CONT_DMA_MAP(container, &map);
>   
> -    error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
> -    return -errno;
> +    if (ret < 0) {
> +        error_report("VFIO_MAP_DMA failed: %s", strerror(-ret));
> +    }
> +    return ret;
>   }
>   
>   static void vfio_host_win_add(VFIOContainer *container,
> @@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
>           dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
>       }
>   
> -    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
> +    ret = CONT_DIRTY_BITMAP(container, &dirty, NULL);
>       if (ret) {
>           error_report("Failed to set dirty tracking flag 0x%x errno: %d",
> -                     dirty.flags, errno);
> +                     dirty.flags, -ret);
>       }
>   }
>   
> @@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>           goto err_out;
>       }
>   
> -    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
> +    ret = CONT_DIRTY_BITMAP(container, dbitmap, range);
>       if (ret) {
>           error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
>                   " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
> -                (uint64_t)range->size, errno);
> +                (uint64_t)range->size, -ret);
>           goto err_out;
>       }
>   
> @@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>       container->error = NULL;
>       container->dirty_pages_supported = false;
>       container->dma_max_mappings = 0;
> +    container->io_ops = &vfio_cont_io_ioctl;
>       QLIST_INIT(&container->giommu_list);
>       QLIST_INIT(&container->hostwin_list);
>       QLIST_INIT(&container->vrdl_list);
> @@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>       }
>       return vfio_eeh_container_op(container, op);
>   }
> +
> +/*
> + * Traditional ioctl() based io_ops
> + */
> +
> +static int vfio_io_dma_map(VFIOContainer *container,
> +                           struct vfio_iommu_type1_dma_map *map)
> +{
> +
> +    /*
> +     * Try the mapping, if it fails with EBUSY, unmap the region and try
> +     * again.  This shouldn't be necessary, but we sometimes see it in
> +     * the VGA ROM space.
> +     */
> +    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 ||
> +        (errno == EBUSY &&
> +         vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 &&
> +         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) {
> +        return 0;
> +    }
> +    return -errno;
> +}
> +
> +static int vfio_io_dma_unmap(VFIOContainer *container,
> +                             struct vfio_iommu_type1_dma_unmap *unmap,
> +                             struct vfio_bitmap *bitmap)
> +{
> +
> +    while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) {
> +        /*
> +         * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> +         * v4.15) where an overflow in its wrap-around check prevents us from
> +         * unmapping the last page of the address space.  Test for the error
> +         * condition and re-try the unmap excluding the last page.  The
> +         * expectation is that we've never mapped the last page anyway and this
> +         * unmap request comes via vIOMMU support which also makes it unlikely
> +         * that this page is used.  This bug was introduced well after type1 v2
> +         * support was introduced, so we shouldn't need to test for v1.  A fix
> +         * is queued for kernel v5.0 so this workaround can be removed once
> +         * affected kernels are sufficiently deprecated.
> +         */
> +        if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) &&
> +            container->iommu_type == VFIO_TYPE1v2_IOMMU) {
> +            trace_vfio_dma_unmap_overflow_workaround();
> +            unmap->size -= 1ULL << ctz64(container->pgsizes);
> +            continue;
> +        }
> +        error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
> +        return -errno;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vfio_io_dirty_bitmap(VFIOContainer *container,
> +                                struct vfio_iommu_type1_dirty_bitmap *bitmap,
> +                                struct vfio_iommu_type1_dirty_bitmap_get *range)
> +{
> +    int ret;
> +
> +    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap);
> +
> +    return ret < 0 ? -errno : ret;
> +}
> +
> +VFIOContIO vfio_cont_io_ioctl = {
> +    .dma_map = vfio_io_dma_map,
> +    .dma_unmap = vfio_io_dma_unmap,
> +    .dirty_bitmap = vfio_io_dirty_bitmap,
> +};
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index e573f5a..6fd40f1 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
>   } VFIOAddressSpace;
>   
>   struct VFIOGroup;
> +typedef struct VFIOContIO VFIOContIO;
>   
>   typedef struct VFIOContainer {
>       VFIOAddressSpace *space;
> @@ -83,6 +84,7 @@ typedef struct VFIOContainer {
>       MemoryListener prereg_listener;
>       unsigned iommu_type;
>       Error *error;
> +    VFIOContIO *io_ops;

ops should be enough.

>       bool initialized;
>       bool dirty_pages_supported;
>       uint64_t dirty_pgsizes;
> @@ -154,6 +156,37 @@ struct VFIODeviceOps {
>       int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
>   };
>   
> +#ifdef CONFIG_LINUX
> +
> +/*
> + * The next 2 ops vectors are how Devices and Containers
> + * communicate with the server.  The default option is
> + * through ioctl() to the kernel VFIO driver, but vfio-user
> + * can use a socket to a remote process.
> + */
> +
> +struct VFIOContIO {

VFIOContainerOps seems more adequate with the current VFIO terminology
in QEMU.

Thanks,

C.

> +    int (*dma_map)(VFIOContainer *container,
> +                   struct vfio_iommu_type1_dma_map *map);
> +    int (*dma_unmap)(VFIOContainer *container,
> +                     struct vfio_iommu_type1_dma_unmap *unmap,
> +                     struct vfio_bitmap *bitmap);
> +    int (*dirty_bitmap)(VFIOContainer *container,
> +                        struct vfio_iommu_type1_dirty_bitmap *bitmap,
> +                        struct vfio_iommu_type1_dirty_bitmap_get *range);
> +};
> +
> +#define CONT_DMA_MAP(cont, map) \
> +    ((cont)->io_ops->dma_map((cont), (map)))
> +#define CONT_DMA_UNMAP(cont, unmap, bitmap) \
> +    ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap)))
> +#define CONT_DIRTY_BITMAP(cont, bitmap, range) \
> +    ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
> +
> +extern VFIOContIO vfio_cont_io_ioctl;
> +
> +#endif /* CONFIG_LINUX */
> +
>   typedef struct VFIOGroup {
>       int fd;
>       int groupid;



  parent reply	other threads:[~2022-12-09 16:11 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-08 23:13 [PATCH v1 00/24] vfio-user client John Johnson
2022-11-08 23:13 ` [PATCH v1 01/24] vfio-user: introduce vfio-user protocol specification John Johnson
2022-11-08 23:13 ` [PATCH v1 02/24] vfio-user: add VFIO base abstract class John Johnson
2022-12-09 13:54   ` John Levon
2022-12-09 16:04   ` Cédric Le Goater
2022-12-12 20:30     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 03/24] vfio-user: add container IO ops vector John Johnson
2022-12-09 14:10   ` John Levon
2022-12-09 16:10   ` Cédric Le Goater [this message]
2022-12-12  9:40     ` Philippe Mathieu-Daudé
2022-11-08 23:13 ` [PATCH v1 04/24] vfio-user: add region cache John Johnson
2022-12-09 14:15   ` John Levon
2022-12-12  7:44   ` Cédric Le Goater
2022-12-12 11:42   ` Philippe Mathieu-Daudé
2023-02-02  5:21     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 05/24] vfio-user: add device IO ops vector John Johnson
2022-12-09 14:43   ` John Levon
2022-12-12  7:59   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 06/24] vfio-user: Define type vfio_user_pci_dev_info John Johnson
2022-12-09 15:23   ` John Levon
2022-12-12  9:01   ` Cédric Le Goater
2022-12-12 11:03     ` John Levon
2022-12-12 11:46       ` Philippe Mathieu-Daudé
2022-12-12 20:44         ` John Johnson
2022-12-12 21:32     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 07/24] vfio-user: connect vfio proxy to remote server John Johnson
2022-12-09 15:23   ` John Levon
2022-12-12 16:24   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 08/24] vfio-user: define socket receive functions John Johnson
2022-12-09 15:34   ` John Levon
2022-12-13 10:45   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 09/24] vfio-user: define socket send functions John Johnson
2022-12-09 15:52   ` John Levon
2022-12-13 13:48   ` Cédric Le Goater
2023-02-02  5:21     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 10/24] vfio-user: get device info John Johnson
2022-12-09 15:57   ` John Levon
2022-12-12 20:28     ` John Johnson
2022-12-13 14:06   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 11/24] vfio-user: get region info John Johnson
2022-12-09 17:04   ` John Levon
2022-12-13 15:15   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 12/24] vfio-user: region read/write John Johnson
2022-12-09 17:11   ` John Levon
2022-12-13 16:13   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 13/24] vfio-user: pci_user_realize PCI setup John Johnson
2022-12-09 17:22   ` John Levon
2022-12-13 16:13   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 14/24] vfio-user: get and set IRQs John Johnson
2022-12-09 17:29   ` John Levon
2022-12-12 20:28     ` John Johnson
2022-12-13 16:39   ` Cédric Le Goater
2022-12-13 23:10     ` John Johnson
2023-02-02  5:21     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 15/24] vfio-user: forward msix BAR accesses to server John Johnson
2022-12-09 17:45   ` John Levon
2022-12-14 17:00   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 16/24] vfio-user: proxy container connect/disconnect John Johnson
2022-12-09 17:54   ` John Levon
2022-12-14 17:59   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 17/24] vfio-user: dma map/unmap operations John Johnson
2022-12-15 12:39   ` Cédric Le Goater
2022-11-08 23:13 ` [PATCH v1 18/24] vfio-user: add dma_unmap_all John Johnson
2022-12-09 17:58   ` John Levon
2022-11-08 23:13 ` [PATCH v1 19/24] vfio-user: secure DMA support John Johnson
2022-12-09 18:01   ` John Levon
2022-12-12 20:31     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 20/24] vfio-user: dma read/write operations John Johnson
2022-12-09 18:11   ` John Levon
2022-11-08 23:13 ` [PATCH v1 21/24] vfio-user: pci reset John Johnson
2022-12-09 18:13   ` John Levon
2022-11-08 23:13 ` [PATCH v1 22/24] vfio-user: add 'x-msg-timeout' option that specifies msg wait times John Johnson
2022-12-09 18:14   ` John Levon
2022-12-15 12:56   ` Cédric Le Goater
2022-12-16  4:22     ` John Johnson
2022-11-08 23:13 ` [PATCH v1 23/24] vfio-user: add coalesced posted writes John Johnson
2022-12-09 18:16   ` John Levon
2022-11-08 23:13 ` [PATCH v1 24/24] vfio-user: add trace points John Johnson
2022-12-15 12:59   ` Cédric Le Goater
2022-12-12  9:41 ` [PATCH v1 00/24] vfio-user client Philippe Mathieu-Daudé
2022-12-16 11:31 ` Cédric Le Goater
2023-02-02  5:20   ` John Johnson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f3cda64b-ed4b-5e4d-5a0d-82c3b3861d41@redhat.com \
    --to=clg@redhat.com \
    --cc=john.g.johnson@oracle.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).