* [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 4:38 ` Duan, Zhenzhong
2024-07-23 7:18 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 2/9] vfio/{iommufd,container}: Remove caps::aw_bits Joao Martins
` (9 subsequent siblings)
10 siblings, 2 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
There's generally two modes of operation for IOMMUFD:
1) The simple user API which intends to perform relatively simple things
with IOMMUs e.g. DPDK. The process generally creates an IOAS and attaches
to VFIO and mainly performs IOAS_MAP and UNMAP.
2) The native IOMMUFD API where you have fine grained control of the
IOMMU domain and model it accordingly. This is where most new feature
are being steered to.
For dirty tracking 2) is required, as it needs to ensure that
the stage-2/parent IOMMU domain will only attach devices
that support dirty tracking (so far it is all homogeneous in x86, likely
not the case for smmuv3). Such invariant on dirty tracking provides a
useful guarantee to VMMs that will refuse incompatible device
attachments for IOMMU domains.
Dirty tracking insurance is enforced via HWPT_ALLOC, which is
responsible for creating an IOMMU domain. This is contrast to the
'simple API' where the IOMMU domain is created by IOMMUFD automatically
when it attaches to VFIO (usually referred as autodomains) but it has
the needed handling for mdevs.
To support dirty tracking with the advanced IOMMUFD API, it needs
similar logic, where IOMMU domains are created and devices attached to
compatible domains. Essentially mimicking kernel
iommufd_device_auto_get_domain(). With mdevs given there's no IOMMU domain
it falls back to IOAS attach.
The auto domain logic allows different IOMMU domains to be created when
DMA dirty tracking is not desired (and VF can provide it), and others where
it is. Here it is not used in this way given how VFIODevice migration
state is initialized after the device attachment. But such mixed mode of
IOMMU dirty tracking + device dirty tracking is an improvement that can
be added on. Keep the 'all of nothing' of type1 approach that we have
been using so far between container vs device dirty tracking.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
include/hw/vfio/vfio-common.h | 9 ++++
include/sysemu/iommufd.h | 5 +++
backends/iommufd.c | 30 +++++++++++++
hw/vfio/iommufd.c | 84 +++++++++++++++++++++++++++++++++++
backends/trace-events | 1 +
5 files changed, 129 insertions(+)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 98acae8c1c97..1a96678f8c38 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -95,10 +95,17 @@ typedef struct VFIOHostDMAWindow {
typedef struct IOMMUFDBackend IOMMUFDBackend;
+typedef struct VFIOIOASHwpt {
+ uint32_t hwpt_id;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOIOASHwpt) next;
+} VFIOIOASHwpt;
+
typedef struct VFIOIOMMUFDContainer {
VFIOContainerBase bcontainer;
IOMMUFDBackend *be;
uint32_t ioas_id;
+ QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
} VFIOIOMMUFDContainer;
OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
@@ -135,6 +142,8 @@ typedef struct VFIODevice {
HostIOMMUDevice *hiod;
int devid;
IOMMUFDBackend *iommufd;
+ VFIOIOASHwpt *hwpt;
+ QLIST_ENTRY(VFIODevice) hwpt_next;
} VFIODevice;
struct VFIODeviceOps {
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 57d502a1c79a..e917e7591d05 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -50,6 +50,11 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
uint64_t *caps, Error **errp);
+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
+ uint32_t pt_id, uint32_t flags,
+ uint32_t data_type, uint32_t data_len,
+ void *data_ptr, uint32_t *out_hwpt,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 48dfd3962474..60a3d14bfab4 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -207,6 +207,36 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
return ret;
}
+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
+ uint32_t pt_id, uint32_t flags,
+ uint32_t data_type, uint32_t data_len,
+ void *data_ptr, uint32_t *out_hwpt,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ struct iommu_hwpt_alloc alloc_hwpt = {
+ .size = sizeof(struct iommu_hwpt_alloc),
+ .flags = flags,
+ .dev_id = dev_id,
+ .pt_id = pt_id,
+ .data_type = data_type,
+ .data_len = data_len,
+ .data_uptr = (uintptr_t)data_ptr,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
+ trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
+ data_len, (uintptr_t)data_ptr,
+ alloc_hwpt.out_hwpt_id, ret);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to allocate hwpt");
+ return false;
+ }
+
+ *out_hwpt = alloc_hwpt.out_hwpt_id;
+ return true;
+}
+
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
uint64_t *caps, Error **errp)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 7390621ee927..172553b1f7f8 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -212,10 +212,88 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
return true;
}
+static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
+ VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ IOMMUFDBackend *iommufd = vbasedev->iommufd;
+ uint32_t flags = 0;
+ VFIOIOASHwpt *hwpt;
+ uint32_t hwpt_id;
+ int ret;
+
+ /* Try to find a domain */
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (ret) {
+ /* -EINVAL means the domain is incompatible with the device. */
+ if (ret == -EINVAL) {
+ /*
+ * It is an expected failure and it just means we will try
+ * another domain, or create one if no existing compatible
+ * domain is found. Hence why the error is discarded below.
+ */
+ error_free(*errp);
+ *errp = NULL;
+ continue;
+ }
+
+ return false;
+ } else {
+ vbasedev->hwpt = hwpt;
+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
+ return true;
+ }
+ }
+
+ if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
+ container->ioas_id, flags,
+ IOMMU_HWPT_DATA_NONE, 0, NULL,
+ &hwpt_id, errp)) {
+ return false;
+ }
+
+ hwpt = g_malloc0(sizeof(*hwpt));
+ hwpt->hwpt_id = hwpt_id;
+ QLIST_INIT(&hwpt->device_list);
+
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
+ g_free(hwpt);
+ return false;
+ }
+
+ vbasedev->hwpt = hwpt;
+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
+ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
+ return true;
+}
+
+static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
+ VFIOIOMMUFDContainer *container)
+{
+ VFIOIOASHwpt *hwpt = vbasedev->hwpt;
+
+ QLIST_REMOVE(vbasedev, hwpt_next);
+ vbasedev->hwpt = NULL;
+
+ if (QLIST_EMPTY(&hwpt->device_list)) {
+ QLIST_REMOVE(hwpt, next);
+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
+ g_free(hwpt);
+ }
+}
+
static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
VFIOIOMMUFDContainer *container,
Error **errp)
{
+ /* mdevs aren't physical devices and will fail with auto domains */
+ if (!vbasedev->mdev) {
+ return iommufd_cdev_autodomains_get(vbasedev, container, errp);
+ }
+
return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
@@ -227,6 +305,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
error_report_err(err);
}
+
+ if (vbasedev->hwpt) {
+ iommufd_cdev_autodomains_put(vbasedev, container);
+ }
+
}
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
@@ -354,6 +437,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
+ QLIST_INIT(&container->hwpt_list);
bcontainer = &container->bcontainer;
vfio_address_space_insert(space, bcontainer);
diff --git a/backends/trace-events b/backends/trace-events
index 211e6f374adc..4d8ac02fe7d6 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -14,4 +14,5 @@ iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
+iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* RE: [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
2024-07-22 21:13 ` [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation Joao Martins
@ 2024-07-23 4:38 ` Duan, Zhenzhong
2024-07-23 6:57 ` Cédric Le Goater
2024-07-23 7:18 ` Eric Auger
1 sibling, 1 reply; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 4:38 UTC (permalink / raw)
To: Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
>
>There's generally two modes of operation for IOMMUFD:
>
>1) The simple user API which intends to perform relatively simple things
>with IOMMUs e.g. DPDK. The process generally creates an IOAS and attaches
>to VFIO and mainly performs IOAS_MAP and UNMAP.
>
>2) The native IOMMUFD API where you have fine grained control of the
>IOMMU domain and model it accordingly. This is where most new feature
>are being steered to.
>
>For dirty tracking 2) is required, as it needs to ensure that
>the stage-2/parent IOMMU domain will only attach devices
>that support dirty tracking (so far it is all homogeneous in x86, likely
>not the case for smmuv3). Such invariant on dirty tracking provides a
>useful guarantee to VMMs that will refuse incompatible device
>attachments for IOMMU domains.
>
>Dirty tracking insurance is enforced via HWPT_ALLOC, which is
>responsible for creating an IOMMU domain. This is contrast to the
>'simple API' where the IOMMU domain is created by IOMMUFD
>automatically
>when it attaches to VFIO (usually referred as autodomains) but it has
>the needed handling for mdevs.
>
>To support dirty tracking with the advanced IOMMUFD API, it needs
>similar logic, where IOMMU domains are created and devices attached to
>compatible domains. Essentially mimicking kernel
>iommufd_device_auto_get_domain(). With mdevs given there's no IOMMU
>domain
>it falls back to IOAS attach.
>
>The auto domain logic allows different IOMMU domains to be created when
>DMA dirty tracking is not desired (and VF can provide it), and others where
>it is. Here it is not used in this way given how VFIODevice migration
>state is initialized after the device attachment. But such mixed mode of
>IOMMU dirty tracking + device dirty tracking is an improvement that can
>be added on. Keep the 'all of nothing' of type1 approach that we have
>been using so far between container vs device dirty tracking.
>
>Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>---
> include/hw/vfio/vfio-common.h | 9 ++++
> include/sysemu/iommufd.h | 5 +++
> backends/iommufd.c | 30 +++++++++++++
> hw/vfio/iommufd.c | 84
>+++++++++++++++++++++++++++++++++++
> backends/trace-events | 1 +
> 5 files changed, 129 insertions(+)
>
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>index 98acae8c1c97..1a96678f8c38 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -95,10 +95,17 @@ typedef struct VFIOHostDMAWindow {
>
> typedef struct IOMMUFDBackend IOMMUFDBackend;
>
>+typedef struct VFIOIOASHwpt {
>+ uint32_t hwpt_id;
>+ QLIST_HEAD(, VFIODevice) device_list;
>+ QLIST_ENTRY(VFIOIOASHwpt) next;
>+} VFIOIOASHwpt;
>+
> typedef struct VFIOIOMMUFDContainer {
> VFIOContainerBase bcontainer;
> IOMMUFDBackend *be;
> uint32_t ioas_id;
>+ QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
> } VFIOIOMMUFDContainer;
>
> OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer,
>VFIO_IOMMU_IOMMUFD);
>@@ -135,6 +142,8 @@ typedef struct VFIODevice {
> HostIOMMUDevice *hiod;
> int devid;
> IOMMUFDBackend *iommufd;
>+ VFIOIOASHwpt *hwpt;
>+ QLIST_ENTRY(VFIODevice) hwpt_next;
> } VFIODevice;
>
> struct VFIODeviceOps {
>diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>index 57d502a1c79a..e917e7591d05 100644
>--- a/include/sysemu/iommufd.h
>+++ b/include/sysemu/iommufd.h
>@@ -50,6 +50,11 @@ int
>iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t
>devid,
> uint32_t *type, void *data, uint32_t len,
> uint64_t *caps, Error **errp);
>+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t
>dev_id,
>+ uint32_t pt_id, uint32_t flags,
>+ uint32_t data_type, uint32_t data_len,
>+ void *data_ptr, uint32_t *out_hwpt,
>+ Error **errp);
>
> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD
>TYPE_HOST_IOMMU_DEVICE "-iommufd"
> #endif
>diff --git a/backends/iommufd.c b/backends/iommufd.c
>index 48dfd3962474..60a3d14bfab4 100644
>--- a/backends/iommufd.c
>+++ b/backends/iommufd.c
>@@ -207,6 +207,36 @@ int
>iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
> return ret;
> }
>
>+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t
>dev_id,
>+ uint32_t pt_id, uint32_t flags,
>+ uint32_t data_type, uint32_t data_len,
>+ void *data_ptr, uint32_t *out_hwpt,
>+ Error **errp)
>+{
>+ int ret, fd = be->fd;
>+ struct iommu_hwpt_alloc alloc_hwpt = {
>+ .size = sizeof(struct iommu_hwpt_alloc),
>+ .flags = flags,
>+ .dev_id = dev_id,
>+ .pt_id = pt_id,
>+ .data_type = data_type,
>+ .data_len = data_len,
>+ .data_uptr = (uintptr_t)data_ptr,
>+ };
>+
>+ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
>+ trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
>+ data_len, (uintptr_t)data_ptr,
>+ alloc_hwpt.out_hwpt_id, ret);
>+ if (ret) {
>+ error_setg_errno(errp, errno, "Failed to allocate hwpt");
>+ return false;
>+ }
>+
>+ *out_hwpt = alloc_hwpt.out_hwpt_id;
>+ return true;
>+}
>+
> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t
>devid,
> uint32_t *type, void *data, uint32_t len,
> uint64_t *caps, Error **errp)
>diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>index 7390621ee927..172553b1f7f8 100644
>--- a/hw/vfio/iommufd.c
>+++ b/hw/vfio/iommufd.c
>@@ -212,10 +212,88 @@ static bool
>iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
> return true;
> }
>
>+static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>+ VFIOIOMMUFDContainer *container,
>+ Error **errp)
>+{
>+ IOMMUFDBackend *iommufd = vbasedev->iommufd;
>+ uint32_t flags = 0;
>+ VFIOIOASHwpt *hwpt;
>+ uint32_t hwpt_id;
>+ int ret;
>+
>+ /* Try to find a domain */
>+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id,
>errp);
>+ if (ret) {
>+ /* -EINVAL means the domain is incompatible with the device. */
>+ if (ret == -EINVAL) {
>+ /*
>+ * It is an expected failure and it just means we will try
>+ * another domain, or create one if no existing compatible
>+ * domain is found. Hence why the error is discarded below.
>+ */
>+ error_free(*errp);
Better to have ERRP_GUARD(), other than that,
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Thanks
Zhenzhong
>+ *errp = NULL;
>+ continue;
>+ }
>+
>+ return false;
>+ } else {
>+ vbasedev->hwpt = hwpt;
>+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>+ return true;
>+ }
>+ }
>+
>+ if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>+ container->ioas_id, flags,
>+ IOMMU_HWPT_DATA_NONE, 0, NULL,
>+ &hwpt_id, errp)) {
>+ return false;
>+ }
>+
>+ hwpt = g_malloc0(sizeof(*hwpt));
>+ hwpt->hwpt_id = hwpt_id;
>+ QLIST_INIT(&hwpt->device_list);
>+
>+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>+ if (ret) {
>+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
>+ g_free(hwpt);
>+ return false;
>+ }
>+
>+ vbasedev->hwpt = hwpt;
>+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>+ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>+ return true;
>+}
>+
>+static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
>+ VFIOIOMMUFDContainer *container)
>+{
>+ VFIOIOASHwpt *hwpt = vbasedev->hwpt;
>+
>+ QLIST_REMOVE(vbasedev, hwpt_next);
>+ vbasedev->hwpt = NULL;
>+
>+ if (QLIST_EMPTY(&hwpt->device_list)) {
>+ QLIST_REMOVE(hwpt, next);
>+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
>+ g_free(hwpt);
>+ }
>+}
>+
> static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
> VFIOIOMMUFDContainer *container,
> Error **errp)
> {
>+ /* mdevs aren't physical devices and will fail with auto domains */
>+ if (!vbasedev->mdev) {
>+ return iommufd_cdev_autodomains_get(vbasedev, container, errp);
>+ }
>+
> return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id,
>errp);
> }
>
>@@ -227,6 +305,11 @@ static void
>iommufd_cdev_detach_container(VFIODevice *vbasedev,
> if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
> error_report_err(err);
> }
>+
>+ if (vbasedev->hwpt) {
>+ iommufd_cdev_autodomains_put(vbasedev, container);
>+ }
>+
> }
>
> static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer
>*container)
>@@ -354,6 +437,7 @@ static bool iommufd_cdev_attach(const char *name,
>VFIODevice *vbasedev,
> container =
>VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
> container->be = vbasedev->iommufd;
> container->ioas_id = ioas_id;
>+ QLIST_INIT(&container->hwpt_list);
>
> bcontainer = &container->bcontainer;
> vfio_address_space_insert(space, bcontainer);
>diff --git a/backends/trace-events b/backends/trace-events
>index 211e6f374adc..4d8ac02fe7d6 100644
>--- a/backends/trace-events
>+++ b/backends/trace-events
>@@ -14,4 +14,5 @@ iommufd_backend_map_dma(int iommufd, uint32_t
>ioas, uint64_t iova, uint64_t size
> iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas,
>uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping:
>iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
> iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova,
>uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64"
>size=0x%"PRIx64" (%d)"
> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d
>ioas=%d"
>+iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t
>pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr,
>uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u
>flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u
>(%d)"
> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d
>id=%d (%d)"
>--
>2.17.2
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
2024-07-23 4:38 ` Duan, Zhenzhong
@ 2024-07-23 6:57 ` Cédric Le Goater
0 siblings, 0 replies; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 6:57 UTC (permalink / raw)
To: Duan, Zhenzhong, Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 06:38, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Subject: [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
>>
>> There's generally two modes of operation for IOMMUFD:
>>
>> 1) The simple user API which intends to perform relatively simple things
>> with IOMMUs e.g. DPDK. The process generally creates an IOAS and attaches
>> to VFIO and mainly performs IOAS_MAP and UNMAP.
>>
>> 2) The native IOMMUFD API where you have fine grained control of the
>> IOMMU domain and model it accordingly. This is where most new feature
>> are being steered to.
>>
>> For dirty tracking 2) is required, as it needs to ensure that
>> the stage-2/parent IOMMU domain will only attach devices
>> that support dirty tracking (so far it is all homogeneous in x86, likely
>> not the case for smmuv3). Such invariant on dirty tracking provides a
>> useful guarantee to VMMs that will refuse incompatible device
>> attachments for IOMMU domains.
>>
>> Dirty tracking insurance is enforced via HWPT_ALLOC, which is
>> responsible for creating an IOMMU domain. This is contrast to the
>> 'simple API' where the IOMMU domain is created by IOMMUFD
>> automatically
>> when it attaches to VFIO (usually referred as autodomains) but it has
>> the needed handling for mdevs.
>>
>> To support dirty tracking with the advanced IOMMUFD API, it needs
>> similar logic, where IOMMU domains are created and devices attached to
>> compatible domains. Essentially mimicking kernel
>> iommufd_device_auto_get_domain(). With mdevs given there's no IOMMU
>> domain
>> it falls back to IOAS attach.
>>
>> The auto domain logic allows different IOMMU domains to be created when
>> DMA dirty tracking is not desired (and VF can provide it), and others where
>> it is. Here it is not used in this way given how VFIODevice migration
>> state is initialized after the device attachment. But such mixed mode of
>> IOMMU dirty tracking + device dirty tracking is an improvement that can
>> be added on. Keep the 'all of nothing' of type1 approach that we have
>> been using so far between container vs device dirty tracking.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> include/hw/vfio/vfio-common.h | 9 ++++
>> include/sysemu/iommufd.h | 5 +++
>> backends/iommufd.c | 30 +++++++++++++
>> hw/vfio/iommufd.c | 84
>> +++++++++++++++++++++++++++++++++++
>> backends/trace-events | 1 +
>> 5 files changed, 129 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>> common.h
>> index 98acae8c1c97..1a96678f8c38 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -95,10 +95,17 @@ typedef struct VFIOHostDMAWindow {
>>
>> typedef struct IOMMUFDBackend IOMMUFDBackend;
>>
>> +typedef struct VFIOIOASHwpt {
>> + uint32_t hwpt_id;
>> + QLIST_HEAD(, VFIODevice) device_list;
>> + QLIST_ENTRY(VFIOIOASHwpt) next;
>> +} VFIOIOASHwpt;
>> +
>> typedef struct VFIOIOMMUFDContainer {
>> VFIOContainerBase bcontainer;
>> IOMMUFDBackend *be;
>> uint32_t ioas_id;
>> + QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
>> } VFIOIOMMUFDContainer;
>>
>> OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer,
>> VFIO_IOMMU_IOMMUFD);
>> @@ -135,6 +142,8 @@ typedef struct VFIODevice {
>> HostIOMMUDevice *hiod;
>> int devid;
>> IOMMUFDBackend *iommufd;
>> + VFIOIOASHwpt *hwpt;
>> + QLIST_ENTRY(VFIODevice) hwpt_next;
>> } VFIODevice;
>>
>> struct VFIODeviceOps {
>> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>> index 57d502a1c79a..e917e7591d05 100644
>> --- a/include/sysemu/iommufd.h
>> +++ b/include/sysemu/iommufd.h
>> @@ -50,6 +50,11 @@ int
>> iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
>> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t
>> devid,
>> uint32_t *type, void *data, uint32_t len,
>> uint64_t *caps, Error **errp);
>> +bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t
>> dev_id,
>> + uint32_t pt_id, uint32_t flags,
>> + uint32_t data_type, uint32_t data_len,
>> + void *data_ptr, uint32_t *out_hwpt,
>> + Error **errp);
>>
>> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD
>> TYPE_HOST_IOMMU_DEVICE "-iommufd"
>> #endif
>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>> index 48dfd3962474..60a3d14bfab4 100644
>> --- a/backends/iommufd.c
>> +++ b/backends/iommufd.c
>> @@ -207,6 +207,36 @@ int
>> iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
>> return ret;
>> }
>>
>> +bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t
>> dev_id,
>> + uint32_t pt_id, uint32_t flags,
>> + uint32_t data_type, uint32_t data_len,
>> + void *data_ptr, uint32_t *out_hwpt,
>> + Error **errp)
>> +{
>> + int ret, fd = be->fd;
>> + struct iommu_hwpt_alloc alloc_hwpt = {
>> + .size = sizeof(struct iommu_hwpt_alloc),
>> + .flags = flags,
>> + .dev_id = dev_id,
>> + .pt_id = pt_id,
>> + .data_type = data_type,
>> + .data_len = data_len,
>> + .data_uptr = (uintptr_t)data_ptr,
>> + };
>> +
>> + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
>> + trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
>> + data_len, (uintptr_t)data_ptr,
>> + alloc_hwpt.out_hwpt_id, ret);
>> + if (ret) {
>> + error_setg_errno(errp, errno, "Failed to allocate hwpt");
>> + return false;
>> + }
>> +
>> + *out_hwpt = alloc_hwpt.out_hwpt_id;
>> + return true;
>> +}
>> +
>> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t
>> devid,
>> uint32_t *type, void *data, uint32_t len,
>> uint64_t *caps, Error **errp)
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 7390621ee927..172553b1f7f8 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -212,10 +212,88 @@ static bool
>> iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
>> return true;
>> }
>>
>> +static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> + VFIOIOMMUFDContainer *container,
>> + Error **errp)
>> +{
>> + IOMMUFDBackend *iommufd = vbasedev->iommufd;
>> + uint32_t flags = 0;
>> + VFIOIOASHwpt *hwpt;
>> + uint32_t hwpt_id;
>> + int ret;
>> +
>> + /* Try to find a domain */
>> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>> + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id,
>> errp);
>> + if (ret) {
>> + /* -EINVAL means the domain is incompatible with the device. */
>> + if (ret == -EINVAL) {
>> + /*
>> + * It is an expected failure and it just means we will try
>> + * another domain, or create one if no existing compatible
>> + * domain is found. Hence why the error is discarded below.
>> + */
>> + error_free(*errp);
>
> Better to have ERRP_GUARD(), other than that,
Fixed on vfio-9.1.
Thanks,
C.
>
> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>
> Thanks
> Zhenzhong
>
>> + *errp = NULL;
>> + continue;
>> + }
>> +
>> + return false;
>> + } else {
>> + vbasedev->hwpt = hwpt;
>> + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>> + return true;
>> + }
>> + }
>> +
>> + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>> + container->ioas_id, flags,
>> + IOMMU_HWPT_DATA_NONE, 0, NULL,
>> + &hwpt_id, errp)) {
>> + return false;
>> + }
>> +
>> + hwpt = g_malloc0(sizeof(*hwpt));
>> + hwpt->hwpt_id = hwpt_id;
>> + QLIST_INIT(&hwpt->device_list);
>> +
>> + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>> + if (ret) {
>> + iommufd_backend_free_id(container->be, hwpt->hwpt_id);
>> + g_free(hwpt);
>> + return false;
>> + }
>> +
>> + vbasedev->hwpt = hwpt;
>> + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>> + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>> + return true;
>> +}
>> +
>> +static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
>> + VFIOIOMMUFDContainer *container)
>> +{
>> + VFIOIOASHwpt *hwpt = vbasedev->hwpt;
>> +
>> + QLIST_REMOVE(vbasedev, hwpt_next);
>> + vbasedev->hwpt = NULL;
>> +
>> + if (QLIST_EMPTY(&hwpt->device_list)) {
>> + QLIST_REMOVE(hwpt, next);
>> + iommufd_backend_free_id(container->be, hwpt->hwpt_id);
>> + g_free(hwpt);
>> + }
>> +}
>> +
>> static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
>> VFIOIOMMUFDContainer *container,
>> Error **errp)
>> {
>> + /* mdevs aren't physical devices and will fail with auto domains */
>> + if (!vbasedev->mdev) {
>> + return iommufd_cdev_autodomains_get(vbasedev, container, errp);
>> + }
>> +
>> return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id,
>> errp);
>> }
>>
>> @@ -227,6 +305,11 @@ static void
>> iommufd_cdev_detach_container(VFIODevice *vbasedev,
>> if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
>> error_report_err(err);
>> }
>> +
>> + if (vbasedev->hwpt) {
>> + iommufd_cdev_autodomains_put(vbasedev, container);
>> + }
>> +
>> }
>>
>> static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer
>> *container)
>> @@ -354,6 +437,7 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice *vbasedev,
>> container =
>> VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
>> container->be = vbasedev->iommufd;
>> container->ioas_id = ioas_id;
>> + QLIST_INIT(&container->hwpt_list);
>>
>> bcontainer = &container->bcontainer;
>> vfio_address_space_insert(space, bcontainer);
>> diff --git a/backends/trace-events b/backends/trace-events
>> index 211e6f374adc..4d8ac02fe7d6 100644
>> --- a/backends/trace-events
>> +++ b/backends/trace-events
>> @@ -14,4 +14,5 @@ iommufd_backend_map_dma(int iommufd, uint32_t
>> ioas, uint64_t iova, uint64_t size
>> iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas,
>> uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping:
>> iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
>> iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova,
>> uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64"
>> size=0x%"PRIx64" (%d)"
>> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d
>> ioas=%d"
>> +iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t
>> pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr,
>> uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u
>> flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u
>> (%d)"
>> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d
>> id=%d (%d)"
>> --
>> 2.17.2
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation
2024-07-22 21:13 ` [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation Joao Martins
2024-07-23 4:38 ` Duan, Zhenzhong
@ 2024-07-23 7:18 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:18 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> There's generally two modes of operation for IOMMUFD:
>
> 1) The simple user API which intends to perform relatively simple things
> with IOMMUs e.g. DPDK. The process generally creates an IOAS and attaches
> to VFIO and mainly performs IOAS_MAP and UNMAP.
>
> 2) The native IOMMUFD API where you have fine grained control of the
> IOMMU domain and model it accordingly. This is where most new feature
> are being steered to.
>
> For dirty tracking 2) is required, as it needs to ensure that
> the stage-2/parent IOMMU domain will only attach devices
> that support dirty tracking (so far it is all homogeneous in x86, likely
> not the case for smmuv3). Such invariant on dirty tracking provides a
> useful guarantee to VMMs that will refuse incompatible device
> attachments for IOMMU domains.
>
> Dirty tracking insurance is enforced via HWPT_ALLOC, which is
> responsible for creating an IOMMU domain. This is contrast to the
> 'simple API' where the IOMMU domain is created by IOMMUFD automatically
> when it attaches to VFIO (usually referred as autodomains) but it has
> the needed handling for mdevs.
>
> To support dirty tracking with the advanced IOMMUFD API, it needs
> similar logic, where IOMMU domains are created and devices attached to
> compatible domains. Essentially mimicking kernel
> iommufd_device_auto_get_domain(). With mdevs given there's no IOMMU domain
> it falls back to IOAS attach.
>
> The auto domain logic allows different IOMMU domains to be created when
> DMA dirty tracking is not desired (and VF can provide it), and others where
> it is. Here it is not used in this way given how VFIODevice migration
> state is initialized after the device attachment. But such mixed mode of
> IOMMU dirty tracking + device dirty tracking is an improvement that can
> be added on. Keep the 'all of nothing' of type1 approach that we have
> been using so far between container vs device dirty tracking.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
> ---
> include/hw/vfio/vfio-common.h | 9 ++++
> include/sysemu/iommufd.h | 5 +++
> backends/iommufd.c | 30 +++++++++++++
> hw/vfio/iommufd.c | 84 +++++++++++++++++++++++++++++++++++
> backends/trace-events | 1 +
> 5 files changed, 129 insertions(+)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 98acae8c1c97..1a96678f8c38 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -95,10 +95,17 @@ typedef struct VFIOHostDMAWindow {
>
> typedef struct IOMMUFDBackend IOMMUFDBackend;
>
> +typedef struct VFIOIOASHwpt {
> + uint32_t hwpt_id;
> + QLIST_HEAD(, VFIODevice) device_list;
> + QLIST_ENTRY(VFIOIOASHwpt) next;
> +} VFIOIOASHwpt;
> +
> typedef struct VFIOIOMMUFDContainer {
> VFIOContainerBase bcontainer;
> IOMMUFDBackend *be;
> uint32_t ioas_id;
> + QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
> } VFIOIOMMUFDContainer;
>
> OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
> @@ -135,6 +142,8 @@ typedef struct VFIODevice {
> HostIOMMUDevice *hiod;
> int devid;
> IOMMUFDBackend *iommufd;
> + VFIOIOASHwpt *hwpt;
> + QLIST_ENTRY(VFIODevice) hwpt_next;
> } VFIODevice;
>
> struct VFIODeviceOps {
> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
> index 57d502a1c79a..e917e7591d05 100644
> --- a/include/sysemu/iommufd.h
> +++ b/include/sysemu/iommufd.h
> @@ -50,6 +50,11 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
> uint32_t *type, void *data, uint32_t len,
> uint64_t *caps, Error **errp);
> +bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
> + uint32_t pt_id, uint32_t flags,
> + uint32_t data_type, uint32_t data_len,
> + void *data_ptr, uint32_t *out_hwpt,
> + Error **errp);
>
> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> #endif
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 48dfd3962474..60a3d14bfab4 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -207,6 +207,36 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
> return ret;
> }
>
> +bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
> + uint32_t pt_id, uint32_t flags,
> + uint32_t data_type, uint32_t data_len,
> + void *data_ptr, uint32_t *out_hwpt,
> + Error **errp)
> +{
> + int ret, fd = be->fd;
> + struct iommu_hwpt_alloc alloc_hwpt = {
> + .size = sizeof(struct iommu_hwpt_alloc),
> + .flags = flags,
> + .dev_id = dev_id,
> + .pt_id = pt_id,
> + .data_type = data_type,
> + .data_len = data_len,
> + .data_uptr = (uintptr_t)data_ptr,
> + };
> +
> + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
> + trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
> + data_len, (uintptr_t)data_ptr,
> + alloc_hwpt.out_hwpt_id, ret);
> + if (ret) {
> + error_setg_errno(errp, errno, "Failed to allocate hwpt");
> + return false;
> + }
> +
> + *out_hwpt = alloc_hwpt.out_hwpt_id;
> + return true;
> +}
> +
> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
> uint32_t *type, void *data, uint32_t len,
> uint64_t *caps, Error **errp)
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 7390621ee927..172553b1f7f8 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -212,10 +212,88 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
> return true;
> }
>
> +static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> + VFIOIOMMUFDContainer *container,
> + Error **errp)
> +{
> + IOMMUFDBackend *iommufd = vbasedev->iommufd;
> + uint32_t flags = 0;
> + VFIOIOASHwpt *hwpt;
> + uint32_t hwpt_id;
> + int ret;
> +
> + /* Try to find a domain */
> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
> + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
> + if (ret) {
> + /* -EINVAL means the domain is incompatible with the device. */
> + if (ret == -EINVAL) {
> + /*
> + * It is an expected failure and it just means we will try
> + * another domain, or create one if no existing compatible
> + * domain is found. Hence why the error is discarded below.
> + */
> + error_free(*errp);
> + *errp = NULL;
> + continue;
> + }
> +
> + return false;
> + } else {
> + vbasedev->hwpt = hwpt;
> + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
> + return true;
> + }
> + }
> +
> + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
> + container->ioas_id, flags,
> + IOMMU_HWPT_DATA_NONE, 0, NULL,
> + &hwpt_id, errp)) {
> + return false;
> + }
> +
> + hwpt = g_malloc0(sizeof(*hwpt));
> + hwpt->hwpt_id = hwpt_id;
> + QLIST_INIT(&hwpt->device_list);
> +
> + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
> + if (ret) {
> + iommufd_backend_free_id(container->be, hwpt->hwpt_id);
> + g_free(hwpt);
> + return false;
> + }
> +
> + vbasedev->hwpt = hwpt;
> + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
> + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
> + return true;
> +}
> +
> +static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
> + VFIOIOMMUFDContainer *container)
> +{
> + VFIOIOASHwpt *hwpt = vbasedev->hwpt;
> +
> + QLIST_REMOVE(vbasedev, hwpt_next);
> + vbasedev->hwpt = NULL;
> +
> + if (QLIST_EMPTY(&hwpt->device_list)) {
> + QLIST_REMOVE(hwpt, next);
> + iommufd_backend_free_id(container->be, hwpt->hwpt_id);
> + g_free(hwpt);
> + }
> +}
> +
> static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
> VFIOIOMMUFDContainer *container,
> Error **errp)
> {
> + /* mdevs aren't physical devices and will fail with auto domains */
> + if (!vbasedev->mdev) {
> + return iommufd_cdev_autodomains_get(vbasedev, container, errp);
> + }
> +
> return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
> }
>
> @@ -227,6 +305,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
> if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
> error_report_err(err);
> }
> +
> + if (vbasedev->hwpt) {
> + iommufd_cdev_autodomains_put(vbasedev, container);
> + }
> +
> }
>
> static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
> @@ -354,6 +437,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
> container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
> container->be = vbasedev->iommufd;
> container->ioas_id = ioas_id;
> + QLIST_INIT(&container->hwpt_list);
>
> bcontainer = &container->bcontainer;
> vfio_address_space_insert(space, bcontainer);
> diff --git a/backends/trace-events b/backends/trace-events
> index 211e6f374adc..4d8ac02fe7d6 100644
> --- a/backends/trace-events
> +++ b/backends/trace-events
> @@ -14,4 +14,5 @@ iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size
> iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
> iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
> +iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 2/9] vfio/{iommufd,container}: Remove caps::aw_bits
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
2024-07-22 21:13 ` [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 7:21 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps Joao Martins
` (8 subsequent siblings)
10 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
Remove caps::aw_bits which requires the bcontainer::iova_ranges being
initialized after device is actually attached. Instead defer that to
.get_cap() and call vfio_device_get_aw_bits() directly.
This is in preparation for HostIOMMUDevice::realize() being called early
during attach_device().
Suggested-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com
---
include/sysemu/host_iommu_device.h | 3 ---
backends/iommufd.c | 3 ++-
hw/vfio/container.c | 5 +----
hw/vfio/iommufd.c | 1 -
4 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index c1bf74ae2c7a..d1c10ff7c239 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -19,12 +19,9 @@
* struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
*
* @type: host platform IOMMU type.
- *
- * @aw_bits: host IOMMU address width. 0xff if no limitation.
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
- uint8_t aw_bits;
} HostIOMMUDeviceCaps;
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 60a3d14bfab4..06b135111f30 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -18,6 +18,7 @@
#include "qemu/error-report.h"
#include "monitor/monitor.h"
#include "trace.h"
+#include "hw/vfio/vfio-common.h"
#include <sys/ioctl.h>
#include <linux/iommufd.h>
@@ -269,7 +270,7 @@ static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
return caps->type;
case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return caps->aw_bits;
+ return vfio_device_get_aw_bits(hiod->agent);
default:
error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
return -EINVAL;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index ce9a858e5621..10cb4b4320ac 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1141,7 +1141,6 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
VFIODevice *vdev = opaque;
hiod->name = g_strdup(vdev->name);
- hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
hiod->agent = opaque;
return true;
@@ -1150,11 +1149,9 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
Error **errp)
{
- HostIOMMUDeviceCaps *caps = &hiod->caps;
-
switch (cap) {
case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return caps->aw_bits;
+ return vfio_device_get_aw_bits(hiod->agent);
default:
error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
return -EINVAL;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 172553b1f7f8..5bb623879abe 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -724,7 +724,6 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
hiod->name = g_strdup(vdev->name);
caps->type = type;
- caps->aw_bits = vfio_device_get_aw_bits(vdev);
return true;
}
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH v6 2/9] vfio/{iommufd,container}: Remove caps::aw_bits
2024-07-22 21:13 ` [PATCH v6 2/9] vfio/{iommufd,container}: Remove caps::aw_bits Joao Martins
@ 2024-07-23 7:21 ` Eric Auger
0 siblings, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:21 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> Remove caps::aw_bits which requires the bcontainer::iova_ranges being
> initialized after device is actually attached. Instead defer that to
> .get_cap() and call vfio_device_get_aw_bits() directly.
>
> This is in preparation for HostIOMMUDevice::realize() being called early
> during attach_device().
>
> Suggested-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Reviewed-by: Cédric Le Goater <clg@redhat.com
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
> ---
> include/sysemu/host_iommu_device.h | 3 ---
> backends/iommufd.c | 3 ++-
> hw/vfio/container.c | 5 +----
> hw/vfio/iommufd.c | 1 -
> 4 files changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
> index c1bf74ae2c7a..d1c10ff7c239 100644
> --- a/include/sysemu/host_iommu_device.h
> +++ b/include/sysemu/host_iommu_device.h
> @@ -19,12 +19,9 @@
> * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
> *
> * @type: host platform IOMMU type.
> - *
> - * @aw_bits: host IOMMU address width. 0xff if no limitation.
> */
> typedef struct HostIOMMUDeviceCaps {
> uint32_t type;
> - uint8_t aw_bits;
> } HostIOMMUDeviceCaps;
>
> #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 60a3d14bfab4..06b135111f30 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -18,6 +18,7 @@
> #include "qemu/error-report.h"
> #include "monitor/monitor.h"
> #include "trace.h"
> +#include "hw/vfio/vfio-common.h"
> #include <sys/ioctl.h>
> #include <linux/iommufd.h>
>
> @@ -269,7 +270,7 @@ static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
> case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
> return caps->type;
> case HOST_IOMMU_DEVICE_CAP_AW_BITS:
> - return caps->aw_bits;
> + return vfio_device_get_aw_bits(hiod->agent);
> default:
> error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
> return -EINVAL;
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index ce9a858e5621..10cb4b4320ac 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -1141,7 +1141,6 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
> VFIODevice *vdev = opaque;
>
> hiod->name = g_strdup(vdev->name);
> - hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
> hiod->agent = opaque;
>
> return true;
> @@ -1150,11 +1149,9 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
> static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
> Error **errp)
> {
> - HostIOMMUDeviceCaps *caps = &hiod->caps;
> -
> switch (cap) {
> case HOST_IOMMU_DEVICE_CAP_AW_BITS:
> - return caps->aw_bits;
> + return vfio_device_get_aw_bits(hiod->agent);
> default:
> error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
> return -EINVAL;
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 172553b1f7f8..5bb623879abe 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -724,7 +724,6 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>
> hiod->name = g_strdup(vdev->name);
> caps->type = type;
> - caps->aw_bits = vfio_device_get_aw_bits(vdev);
>
> return true;
> }
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
2024-07-22 21:13 ` [PATCH v6 1/9] vfio/iommufd: Introduce auto domain creation Joao Martins
2024-07-22 21:13 ` [PATCH v6 2/9] vfio/{iommufd,container}: Remove caps::aw_bits Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 5:11 ` Duan, Zhenzhong
2024-07-23 7:26 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 4/9] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device() Joao Martins via
` (7 subsequent siblings)
10 siblings, 2 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
Store the value of @caps returned by iommufd_backend_get_device_info()
in a new field HostIOMMUDeviceCaps::hw_caps. Right now the only value is
whether device IOMMU supports dirty tracking (IOMMU_HW_CAP_DIRTY_TRACKING).
This is in preparation for HostIOMMUDevice::realize() being called early
during attach_device().
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
---
include/sysemu/host_iommu_device.h | 4 ++++
hw/vfio/iommufd.c | 1 +
2 files changed, 5 insertions(+)
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index d1c10ff7c239..809cced4ba5c 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -19,9 +19,13 @@
* struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
*
* @type: host platform IOMMU type.
+ *
+ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
+ * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
+ uint64_t hw_caps;
} HostIOMMUDeviceCaps;
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 5bb623879abe..5e2fc1ce089d 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -724,6 +724,7 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
hiod->name = g_strdup(vdev->name);
caps->type = type;
+ caps->hw_caps = hw_caps;
return true;
}
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* RE: [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
2024-07-22 21:13 ` [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps Joao Martins
@ 2024-07-23 5:11 ` Duan, Zhenzhong
2024-07-23 7:26 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 5:11 UTC (permalink / raw)
To: Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to
>HostIOMMUDeviceCaps
>
>Store the value of @caps returned by iommufd_backend_get_device_info()
>in a new field HostIOMMUDeviceCaps::hw_caps. Right now the only value is
>whether device IOMMU supports dirty tracking
>(IOMMU_HW_CAP_DIRTY_TRACKING).
>
>This is in preparation for HostIOMMUDevice::realize() being called early
>during attach_device().
>
>Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Thanks
Zhenzhong
>---
> include/sysemu/host_iommu_device.h | 4 ++++
> hw/vfio/iommufd.c | 1 +
> 2 files changed, 5 insertions(+)
>
>diff --git a/include/sysemu/host_iommu_device.h
>b/include/sysemu/host_iommu_device.h
>index d1c10ff7c239..809cced4ba5c 100644
>--- a/include/sysemu/host_iommu_device.h
>+++ b/include/sysemu/host_iommu_device.h
>@@ -19,9 +19,13 @@
> * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
> *
> * @type: host platform IOMMU type.
>+ *
>+ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this
>represents
>+ * the @out_capabilities value returned from
>IOMMU_GET_HW_INFO ioctl)
> */
> typedef struct HostIOMMUDeviceCaps {
> uint32_t type;
>+ uint64_t hw_caps;
> } HostIOMMUDeviceCaps;
>
> #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
>diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>index 5bb623879abe..5e2fc1ce089d 100644
>--- a/hw/vfio/iommufd.c
>+++ b/hw/vfio/iommufd.c
>@@ -724,6 +724,7 @@ static bool
>hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>
> hiod->name = g_strdup(vdev->name);
> caps->type = type;
>+ caps->hw_caps = hw_caps;
>
> return true;
> }
>--
>2.17.2
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
2024-07-22 21:13 ` [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps Joao Martins
2024-07-23 5:11 ` Duan, Zhenzhong
@ 2024-07-23 7:26 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:26 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> Store the value of @caps returned by iommufd_backend_get_device_info()
> in a new field HostIOMMUDeviceCaps::hw_caps. Right now the only value is
> whether device IOMMU supports dirty tracking (IOMMU_HW_CAP_DIRTY_TRACKING).
>
> This is in preparation for HostIOMMUDevice::realize() being called early
> during attach_device().
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
> ---
> include/sysemu/host_iommu_device.h | 4 ++++
> hw/vfio/iommufd.c | 1 +
> 2 files changed, 5 insertions(+)
>
> diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
> index d1c10ff7c239..809cced4ba5c 100644
> --- a/include/sysemu/host_iommu_device.h
> +++ b/include/sysemu/host_iommu_device.h
> @@ -19,9 +19,13 @@
> * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
> *
> * @type: host platform IOMMU type.
> + *
> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> + * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> */
> typedef struct HostIOMMUDeviceCaps {
> uint32_t type;
> + uint64_t hw_caps;
> } HostIOMMUDeviceCaps;
>
> #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 5bb623879abe..5e2fc1ce089d 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -724,6 +724,7 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>
> hiod->name = g_strdup(vdev->name);
> caps->type = type;
> + caps->hw_caps = hw_caps;
>
> return true;
> }
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 4/9] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (2 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 3/9] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps Joao Martins
@ 2024-07-22 21:13 ` Joao Martins via
2024-07-23 7:38 ` [PATCH v6 4/9] vfio/{iommufd,container}: " Eric Auger
2024-07-22 21:13 ` [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability Joao Martins
` (6 subsequent siblings)
10 siblings, 1 reply; 51+ messages in thread
From: Joao Martins via @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
behind the device supports dirty tracking.
Note: The HostIOMMUDevice data from legacy backend is static and doesn't
need any information from the (type1-iommu) backend to be initialized.
In contrast however, the IOMMUFD HostIOMMUDevice data requires the
iommufd FD to be connected and having a devid to be able to successfully
GET_HW_INFO. This means vfio_device_hiod_realize() is called in
different places within the backend .attach_device() implementation.
Suggested-by: Cédric Le Goater <clg@redhat.cm>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/vfio/vfio-common.h | 1 +
hw/vfio/common.c | 16 ++++++----------
hw/vfio/container.c | 4 ++++
hw/vfio/helpers.c | 11 +++++++++++
hw/vfio/iommufd.c | 4 ++++
5 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 1a96678f8c38..4e44b26d3c45 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
void vfio_reset_handler(void *opaque);
struct vfio_device_info *vfio_get_device_info(int fd);
bool vfio_device_is_mdev(VFIODevice *vbasedev);
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
bool vfio_attach_device(char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
void vfio_detach_device(VFIODevice *vbasedev);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 784e266e6aab..da12cbd56408 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
{
const VFIOIOMMUClass *ops =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
- HostIOMMUDevice *hiod;
+ HostIOMMUDevice *hiod = NULL;
if (vbasedev->iommufd) {
ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
assert(ops);
- if (!ops->attach_device(name, vbasedev, as, errp)) {
- return false;
- }
- if (vbasedev->mdev) {
- return true;
+ if (!vbasedev->mdev) {
+ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+ vbasedev->hiod = hiod;
}
- hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
- if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+ if (!ops->attach_device(name, vbasedev, as, errp)) {
object_unref(hiod);
- ops->detach_device(vbasedev);
+ vbasedev->hiod = NULL;
return false;
}
- vbasedev->hiod = hiod;
return true;
}
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 10cb4b4320ac..9ccdb639ac84 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
trace_vfio_attach_device(vbasedev->name, groupid);
+ if (!vfio_device_hiod_realize(vbasedev, errp)) {
+ return false;
+ }
+
group = vfio_get_group(groupid, as, errp);
if (!group) {
return false;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 7e23e9080c9d..ea15c79db0a3 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
subsys = realpath(tmp, NULL);
return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
}
+
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
+{
+ HostIOMMUDevice *hiod = vbasedev->hiod;
+
+ if (!hiod) {
+ return true;
+ }
+
+ return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 5e2fc1ce089d..2324bf892c56 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
space = vfio_get_address_space(as);
+ if (!vfio_device_hiod_realize(vbasedev, errp)) {
+ return false;
+ }
+
/* try to attach to an existing container in this space */
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-22 21:13 ` [PATCH v6 4/9] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device() Joao Martins via
@ 2024-07-23 7:38 ` Eric Auger
2024-07-23 7:44 ` Cédric Le Goater
2024-07-23 7:53 ` Joao Martins
0 siblings, 2 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:38 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
Hi Joao,
On 7/22/24 23:13, Joao Martins wrote:
> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
> behind the device supports dirty tracking.
>
> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
> need any information from the (type1-iommu) backend to be initialized.
> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
> iommufd FD to be connected and having a devid to be able to successfully
Nit: maybe this comment shall be also added in iommufd.c before the call
to vfio_device_hiod_realize() to avoid someone else to move that call
earlier at some point
> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
> different places within the backend .attach_device() implementation.
>
> Suggested-by: Cédric Le Goater <clg@redhat.cm>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/hw/vfio/vfio-common.h | 1 +
> hw/vfio/common.c | 16 ++++++----------
> hw/vfio/container.c | 4 ++++
> hw/vfio/helpers.c | 11 +++++++++++
> hw/vfio/iommufd.c | 4 ++++
> 5 files changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1a96678f8c38..4e44b26d3c45 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
> void vfio_reset_handler(void *opaque);
> struct vfio_device_info *vfio_get_device_info(int fd);
> bool vfio_device_is_mdev(VFIODevice *vbasedev);
> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
> AddressSpace *as, Error **errp);
> void vfio_detach_device(VFIODevice *vbasedev);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 784e266e6aab..da12cbd56408 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
> {
> const VFIOIOMMUClass *ops =
> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
> - HostIOMMUDevice *hiod;
> + HostIOMMUDevice *hiod = NULL;
>
> if (vbasedev->iommufd) {
> ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>
> assert(ops);
>
> - if (!ops->attach_device(name, vbasedev, as, errp)) {
> - return false;
> - }
>
> - if (vbasedev->mdev) {
> - return true;
> + if (!vbasedev->mdev) {
> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
> + vbasedev->hiod = hiod;
> }
>
> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
> + if (!ops->attach_device(name, vbasedev, as, errp)) {
> object_unref(hiod);
> - ops->detach_device(vbasedev);
> + vbasedev->hiod = NULL;
> return false;
> }
> - vbasedev->hiod = hiod;
>
> return true;
> }
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 10cb4b4320ac..9ccdb639ac84 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>
> trace_vfio_attach_device(vbasedev->name, groupid);
>
> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
> + return false;
don't you want to go to err_alloc_ioas instead?
> + }
> +
> group = vfio_get_group(groupid, as, errp);
> if (!group) {
> return false;
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 7e23e9080c9d..ea15c79db0a3 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
> subsys = realpath(tmp, NULL);
> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
> }
> +
> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
> +{
> + HostIOMMUDevice *hiod = vbasedev->hiod;
> +
> + if (!hiod) {
> + return true;
> + }
> +
> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 5e2fc1ce089d..2324bf892c56 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>
> space = vfio_get_address_space(as);
>
> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
> + return false;
> + }
> +
> /* try to attach to an existing container in this space */
> QLIST_FOREACH(bcontainer, &space->containers, next) {
> container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
Eric
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 7:38 ` [PATCH v6 4/9] vfio/{iommufd,container}: " Eric Auger
@ 2024-07-23 7:44 ` Cédric Le Goater
2024-07-23 7:55 ` Eric Auger
2024-07-23 7:53 ` Joao Martins
1 sibling, 1 reply; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 7:44 UTC (permalink / raw)
To: eric.auger, Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 09:38, Eric Auger wrote:
> Hi Joao,
>
> On 7/22/24 23:13, Joao Martins wrote:
>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>> behind the device supports dirty tracking.
>>
>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>> need any information from the (type1-iommu) backend to be initialized.
>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>> iommufd FD to be connected and having a devid to be able to successfully
> Nit: maybe this comment shall be also added in iommufd.c before the call
> to vfio_device_hiod_realize() to avoid someone else to move that call
> earlier at some point
>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>> different places within the backend .attach_device() implementation.
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/hw/vfio/vfio-common.h | 1 +
>> hw/vfio/common.c | 16 ++++++----------
>> hw/vfio/container.c | 4 ++++
>> hw/vfio/helpers.c | 11 +++++++++++
>> hw/vfio/iommufd.c | 4 ++++
>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 1a96678f8c38..4e44b26d3c45 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>> void vfio_reset_handler(void *opaque);
>> struct vfio_device_info *vfio_get_device_info(int fd);
>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>> AddressSpace *as, Error **errp);
>> void vfio_detach_device(VFIODevice *vbasedev);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 784e266e6aab..da12cbd56408 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>> {
>> const VFIOIOMMUClass *ops =
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>> - HostIOMMUDevice *hiod;
>> + HostIOMMUDevice *hiod = NULL;
>>
>> if (vbasedev->iommufd) {
>> ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>
>> assert(ops);
>>
>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>> - return false;
>> - }
>>
>> - if (vbasedev->mdev) {
>> - return true;
>> + if (!vbasedev->mdev) {
>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> + vbasedev->hiod = hiod;
>> }
>>
>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>> object_unref(hiod);
>> - ops->detach_device(vbasedev);
>> + vbasedev->hiod = NULL;
>> return false;
>> }
>> - vbasedev->hiod = hiod;
>>
>> return true;
>> }
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 10cb4b4320ac..9ccdb639ac84 100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>
>> trace_vfio_attach_device(vbasedev->name, groupid);
>>
>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> + return false;
> don't you want to go to err_alloc_ioas instead?
hmm, the err_alloc_ioas label is in a different function iommufd_cdev_attach().
may be you meant the comment for routine iommufd_cdev_attach() and
label err_connect_bind ?
Thanks,
C.
>> + }
>> +
>> group = vfio_get_group(groupid, as, errp);
>> if (!group) {
>> return false;
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 7e23e9080c9d..ea15c79db0a3 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>> subsys = realpath(tmp, NULL);
>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>> }
>> +
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>> +{
>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>> +
>> + if (!hiod) {
>> + return true;
>> + }
>> +
>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 5e2fc1ce089d..2324bf892c56 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>
>> space = vfio_get_address_space(as);
>>
>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> + return false;
>> + }
>> +
>> /* try to attach to an existing container in this space */
>> QLIST_FOREACH(bcontainer, &space->containers, next) {
>> container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
> Eric
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 7:44 ` Cédric Le Goater
@ 2024-07-23 7:55 ` Eric Auger
2024-07-23 8:05 ` Joao Martins
0 siblings, 1 reply; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:55 UTC (permalink / raw)
To: Cédric Le Goater, Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 09:44, Cédric Le Goater wrote:
> On 7/23/24 09:38, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>> of the device
>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>> allows the use
>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>> if the IOMMU
>>> behind the device supports dirty tracking.
>>>
>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>> doesn't
>>> need any information from the (type1-iommu) backend to be initialized.
>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>> iommufd FD to be connected and having a devid to be able to
>>> successfully
>> Nit: maybe this comment shall be also added in iommufd.c before the call
>> to vfio_device_hiod_realize() to avoid someone else to move that call
>> earlier at some point
>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>> different places within the backend .attach_device() implementation.
>>>
>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 1 +
>>> hw/vfio/common.c | 16 ++++++----------
>>> hw/vfio/container.c | 4 ++++
>>> hw/vfio/helpers.c | 11 +++++++++++
>>> hw/vfio/iommufd.c | 4 ++++
>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h
>>> b/include/hw/vfio/vfio-common.h
>>> index 1a96678f8c38..4e44b26d3c45 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>> void vfio_reset_handler(void *opaque);
>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> AddressSpace *as, Error **errp);
>>> void vfio_detach_device(VFIODevice *vbasedev);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 784e266e6aab..da12cbd56408 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>> *vbasedev,
>>> {
>>> const VFIOIOMMUClass *ops =
>>>
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>> - HostIOMMUDevice *hiod;
>>> + HostIOMMUDevice *hiod = NULL;
>>> if (vbasedev->iommufd) {
>>> ops =
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>> VFIODevice *vbasedev,
>>> assert(ops);
>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> - return false;
>>> - }
>>> - if (vbasedev->mdev) {
>>> - return true;
>>> + if (!vbasedev->mdev) {
>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> + vbasedev->hiod = hiod;
>>> }
>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>> errp)) {
>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> object_unref(hiod);
>>> - ops->detach_device(vbasedev);
>>> + vbasedev->hiod = NULL;
>>> return false;
>>> }
>>> - vbasedev->hiod = hiod;
>>> return true;
>>> }
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>> char *name, VFIODevice *vbasedev,
>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> + return false;
>> don't you want to go to err_alloc_ioas instead?
>
> hmm, the err_alloc_ioas label is in a different function
> iommufd_cdev_attach().
>
> may be you meant the comment for routine iommufd_cdev_attach() and
> label err_connect_bind ?
>
>
> Thanks,
>
> C.
>
>
>>> + }
>>> +
>>> group = vfio_get_group(groupid, as, errp);
>>> if (!group) {
>>> return false;
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>> subsys = realpath(tmp, NULL);
>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>> }
>>> +
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>> +
>>> + if (!hiod) {
>>> + return true;
>>> + }
>>> +
>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>> vbasedev, errp);
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 5e2fc1ce089d..2324bf892c56 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>> *name, VFIODevice *vbasedev,
>>> space = vfio_get_address_space(as);
>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> + return false;
Hum sorry my previous comment was targetting that place. I think
unrolling is needed up to put_address_space
so effectively this does not match err_alloc_ioas but I guess we would
need another label
Eric
>>> + }
>>> +
>>> /* try to attach to an existing container in this space */
>>> QLIST_FOREACH(bcontainer, &space->containers, next) {
>>> container = container_of(bcontainer, VFIOIOMMUFDContainer,
>>> bcontainer);
>> Eric
>>
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 7:55 ` Eric Auger
@ 2024-07-23 8:05 ` Joao Martins
2024-07-23 8:08 ` Cédric Le Goater
` (2 more replies)
0 siblings, 3 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:05 UTC (permalink / raw)
To: eric.auger, Cédric Le Goater, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 23/07/2024 08:55, Eric Auger wrote:
>
>
> On 7/23/24 09:44, Cédric Le Goater wrote:
>> On 7/23/24 09:38, Eric Auger wrote:
>>> Hi Joao,
>>>
>>> On 7/22/24 23:13, Joao Martins wrote:
>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>> of the device
>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>> allows the use
>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>> if the IOMMU
>>>> behind the device supports dirty tracking.
>>>>
>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>> doesn't
>>>> need any information from the (type1-iommu) backend to be initialized.
>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>> iommufd FD to be connected and having a devid to be able to
>>>> successfully
>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>> earlier at some point
>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>> different places within the backend .attach_device() implementation.
>>>>
>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>> include/hw/vfio/vfio-common.h | 1 +
>>>> hw/vfio/common.c | 16 ++++++----------
>>>> hw/vfio/container.c | 4 ++++
>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>> hw/vfio/iommufd.c | 4 ++++
>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>> b/include/hw/vfio/vfio-common.h
>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>> void vfio_reset_handler(void *opaque);
>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>> AddressSpace *as, Error **errp);
>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 784e266e6aab..da12cbd56408 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>> *vbasedev,
>>>> {
>>>> const VFIOIOMMUClass *ops =
>>>>
>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>> - HostIOMMUDevice *hiod;
>>>> + HostIOMMUDevice *hiod = NULL;
>>>> if (vbasedev->iommufd) {
>>>> ops =
>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>> VFIODevice *vbasedev,
>>>> assert(ops);
>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>> - return false;
>>>> - }
>>>> - if (vbasedev->mdev) {
>>>> - return true;
>>>> + if (!vbasedev->mdev) {
>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>> + vbasedev->hiod = hiod;
>>>> }
>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>> errp)) {
>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>> object_unref(hiod);
>>>> - ops->detach_device(vbasedev);
>>>> + vbasedev->hiod = NULL;
>>>> return false;
>>>> }
>>>> - vbasedev->hiod = hiod;
>>>> return true;
>>>> }
>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>> --- a/hw/vfio/container.c
>>>> +++ b/hw/vfio/container.c
>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>> char *name, VFIODevice *vbasedev,
>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>> + return false;
>>> don't you want to go to err_alloc_ioas instead?
>>
>> hmm, the err_alloc_ioas label is in a different function
>> iommufd_cdev_attach().
>>
>> may be you meant the comment for routine iommufd_cdev_attach() and
>> label err_connect_bind ?
>>
>>
>> Thanks,
>>
>> C.
>>
>>
>>>> + }
>>>> +
>>>> group = vfio_get_group(groupid, as, errp);
>>>> if (!group) {
>>>> return false;
>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>> --- a/hw/vfio/helpers.c
>>>> +++ b/hw/vfio/helpers.c
>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>> subsys = realpath(tmp, NULL);
>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>> }
>>>> +
>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>> +{
>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>> +
>>>> + if (!hiod) {
>>>> + return true;
>>>> + }
>>>> +
>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>> vbasedev, errp);
>>>> +}
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>> *name, VFIODevice *vbasedev,
>>>> space = vfio_get_address_space(as);
>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>> + return false;
> Hum sorry my previous comment was targetting that place. I think
> unrolling is needed up to put_address_space
>
> so effectively this does not match err_alloc_ioas but I guess we would
> need another label
>
You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
would do. Adding another label not sure would make things cleaner given the
ordering requirement. So maybe this instead?
@@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
*vbasedev,
space = vfio_get_address_space(as);
if (!vfio_device_hiod_realize(vbasedev, errp)) {
- return false;
+ vfio_put_address_space(space);
+ goto err_connect_bind;
}
/* try to attach to an existing container in this space */
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 8:05 ` Joao Martins
@ 2024-07-23 8:08 ` Cédric Le Goater
2024-07-23 8:10 ` Eric Auger
2024-07-23 8:20 ` Duan, Zhenzhong
2 siblings, 0 replies; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 8:08 UTC (permalink / raw)
To: Joao Martins, eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 10:05, Joao Martins wrote:
> On 23/07/2024 08:55, Eric Auger wrote:
>>
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>> include/hw/vfio/vfio-common.h | 1 +
>>>>> hw/vfio/common.c | 16 ++++++----------
>>>>> hw/vfio/container.c | 4 ++++
>>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>>> hw/vfio/iommufd.c | 4 ++++
>>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>> void vfio_reset_handler(void *opaque);
>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> AddressSpace *as, Error **errp);
>>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>>> *vbasedev,
>>>>> {
>>>>> const VFIOIOMMUClass *ops =
>>>>>
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> - HostIOMMUDevice *hiod;
>>>>> + HostIOMMUDevice *hiod = NULL;
>>>>> if (vbasedev->iommufd) {
>>>>> ops =
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>> assert(ops);
>>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> - return false;
>>>>> - }
>>>>> - if (vbasedev->mdev) {
>>>>> - return true;
>>>>> + if (!vbasedev->mdev) {
>>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> + vbasedev->hiod = hiod;
>>>>> }
>>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>>> errp)) {
>>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> object_unref(hiod);
>>>>> - ops->detach_device(vbasedev);
>>>>> + vbasedev->hiod = NULL;
>>>>> return false;
>>>>> }
>>>>> - vbasedev->hiod = hiod;
>>>>> return true;
>>>>> }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>>
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> + }
>>>>> +
>>>>> group = vfio_get_group(groupid, as, errp);
>>>>> if (!group) {
>>>>> return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>>> subsys = realpath(tmp, NULL);
>>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>> }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> + if (!hiod) {
>>>>> + return true;
>>>>> + }
>>>>> +
>>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>> space = vfio_get_address_space(as);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
>
> You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
> would do. Adding another label not sure would make things cleaner given the
> ordering requirement. So maybe this instead?
>
> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
> space = vfio_get_address_space(as);
>
> if (!vfio_device_hiod_realize(vbasedev, errp)) {
> - return false;
> + vfio_put_address_space(space);
> + goto err_connect_bind;
> }
>
> /* try to attach to an existing container in this space */
>
LGTM.
Thanks,
C.
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 8:05 ` Joao Martins
2024-07-23 8:08 ` Cédric Le Goater
@ 2024-07-23 8:10 ` Eric Auger
2024-07-23 8:20 ` Duan, Zhenzhong
2 siblings, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:10 UTC (permalink / raw)
To: Joao Martins, Cédric Le Goater, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 10:05, Joao Martins wrote:
> On 23/07/2024 08:55, Eric Auger wrote:
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>> include/hw/vfio/vfio-common.h | 1 +
>>>>> hw/vfio/common.c | 16 ++++++----------
>>>>> hw/vfio/container.c | 4 ++++
>>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>>> hw/vfio/iommufd.c | 4 ++++
>>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>> void vfio_reset_handler(void *opaque);
>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> AddressSpace *as, Error **errp);
>>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>>> *vbasedev,
>>>>> {
>>>>> const VFIOIOMMUClass *ops =
>>>>>
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> - HostIOMMUDevice *hiod;
>>>>> + HostIOMMUDevice *hiod = NULL;
>>>>> if (vbasedev->iommufd) {
>>>>> ops =
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>> assert(ops);
>>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> - return false;
>>>>> - }
>>>>> - if (vbasedev->mdev) {
>>>>> - return true;
>>>>> + if (!vbasedev->mdev) {
>>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> + vbasedev->hiod = hiod;
>>>>> }
>>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>>> errp)) {
>>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> object_unref(hiod);
>>>>> - ops->detach_device(vbasedev);
>>>>> + vbasedev->hiod = NULL;
>>>>> return false;
>>>>> }
>>>>> - vbasedev->hiod = hiod;
>>>>> return true;
>>>>> }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> + }
>>>>> +
>>>>> group = vfio_get_group(groupid, as, errp);
>>>>> if (!group) {
>>>>> return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>>> subsys = realpath(tmp, NULL);
>>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>> }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> + if (!hiod) {
>>>>> + return true;
>>>>> + }
>>>>> +
>>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>> space = vfio_get_address_space(as);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
> You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
> would do. Adding another label not sure would make things cleaner given the
> ordering requirement. So maybe this instead?
>
> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
> space = vfio_get_address_space(as);
>
> if (!vfio_device_hiod_realize(vbasedev, errp)) {
> - return false;
> + vfio_put_address_space(space);
> + goto err_connect_bind;
> }
>
> /* try to attach to an existing container in this space */
>
With that addition
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
^ permalink raw reply [flat|nested] 51+ messages in thread
* RE: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 8:05 ` Joao Martins
2024-07-23 8:08 ` Cédric Le Goater
2024-07-23 8:10 ` Eric Auger
@ 2024-07-23 8:20 ` Duan, Zhenzhong
2024-07-23 8:24 ` Eric Auger
2 siblings, 1 reply; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 8:20 UTC (permalink / raw)
To: Joao Martins, eric.auger@redhat.com, Cédric Le Goater,
qemu-devel@nongnu.org
Cc: Liu, Yi L, Alex Williamson, Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>HostIOMMUDevice::realize() during attach_device()
>
>On 23/07/2024 08:55, Eric Auger wrote:
>>
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be
>initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>> include/hw/vfio/vfio-common.h | 1 +
>>>>> hw/vfio/common.c | 16 ++++++----------
>>>>> hw/vfio/container.c | 4 ++++
>>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>>> hw/vfio/iommufd.c | 4 ++++
>>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>> void vfio_reset_handler(void *opaque);
>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> AddressSpace *as, Error **errp);
>>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>VFIODevice
>>>>> *vbasedev,
>>>>> {
>>>>> const VFIOIOMMUClass *ops =
>>>>>
>>>>>
>VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> - HostIOMMUDevice *hiod;
>>>>> + HostIOMMUDevice *hiod = NULL;
>>>>> if (vbasedev->iommufd) {
>>>>> ops =
>>>>>
>VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>D));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>> assert(ops);
>>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> - return false;
>>>>> - }
>>>>> - if (vbasedev->mdev) {
>>>>> - return true;
>>>>> + if (!vbasedev->mdev) {
>>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>hiod_typename));
>>>>> + vbasedev->hiod = hiod;
>>>>> }
>>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>vbasedev,
>>>>> errp)) {
>>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> object_unref(hiod);
>>>>> - ops->detach_device(vbasedev);
>>>>> + vbasedev->hiod = NULL;
>>>>> return false;
>>>>> }
>>>>> - vbasedev->hiod = hiod;
>>>>> return true;
>>>>> }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>>
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> + }
>>>>> +
>>>>> group = vfio_get_group(groupid, as, errp);
>>>>> if (!group) {
>>>>> return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>*vbasedev)
>>>>> subsys = realpath(tmp, NULL);
>>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>> }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> + if (!hiod) {
>>>>> + return true;
>>>>> + }
>>>>> +
>>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>> space = vfio_get_address_space(as);
>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> + return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
>
>You're right. We haven't yet attached rthe device and that's what
>err_alloc_ioas
>would do. Adding another label not sure would make things cleaner given
>the
>ordering requirement. So maybe this instead?
>
>@@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>VFIODevice
>*vbasedev,
> space = vfio_get_address_space(as);
>
> if (!vfio_device_hiod_realize(vbasedev, errp)) {
>- return false;
>+ vfio_put_address_space(space);
>+ goto err_connect_bind;
> }
>
> /* try to attach to an existing container in this space */
I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 8:20 ` Duan, Zhenzhong
@ 2024-07-23 8:24 ` Eric Auger
2024-07-23 8:26 ` Joao Martins
0 siblings, 1 reply; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:24 UTC (permalink / raw)
To: Duan, Zhenzhong, Joao Martins, Cédric Le Goater,
qemu-devel@nongnu.org
Cc: Liu, Yi L, Alex Williamson, Jason Gunthorpe, Avihai Horon
On 7/23/24 10:20, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>> HostIOMMUDevice::realize() during attach_device()
>>
>> On 23/07/2024 08:55, Eric Auger wrote:
>>>
>>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>>> On 7/23/24 09:38, Eric Auger wrote:
>>>>> Hi Joao,
>>>>>
>>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>>> of the device
>>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>>> allows the use
>>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>> tell
>>>>>> if the IOMMU
>>>>>> behind the device supports dirty tracking.
>>>>>>
>>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>>> doesn't
>>>>>> need any information from the (type1-iommu) backend to be
>> initialized.
>>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>> the
>>>>>> iommufd FD to be connected and having a devid to be able to
>>>>>> successfully
>>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>> call
>>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>>> earlier at some point
>>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>>> different places within the backend .attach_device() implementation.
>>>>>>
>>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>> ---
>>>>>> include/hw/vfio/vfio-common.h | 1 +
>>>>>> hw/vfio/common.c | 16 ++++++----------
>>>>>> hw/vfio/container.c | 4 ++++
>>>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>>>> hw/vfio/iommufd.c | 4 ++++
>>>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>>
>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>> void vfio_reset_handler(void *opaque);
>>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>> AddressSpace *as, Error **errp);
>>>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>>> --- a/hw/vfio/common.c
>>>>>> +++ b/hw/vfio/common.c
>>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>> VFIODevice
>>>>>> *vbasedev,
>>>>>> {
>>>>>> const VFIOIOMMUClass *ops =
>>>>>>
>>>>>>
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>>> - HostIOMMUDevice *hiod;
>>>>>> + HostIOMMUDevice *hiod = NULL;
>>>>>> if (vbasedev->iommufd) {
>>>>>> ops =
>>>>>>
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>> D));
>>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>>> VFIODevice *vbasedev,
>>>>>> assert(ops);
>>>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>> - return false;
>>>>>> - }
>>>>>> - if (vbasedev->mdev) {
>>>>>> - return true;
>>>>>> + if (!vbasedev->mdev) {
>>>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>> hiod_typename));
>>>>>> + vbasedev->hiod = hiod;
>>>>>> }
>>>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>> vbasedev,
>>>>>> errp)) {
>>>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>> object_unref(hiod);
>>>>>> - ops->detach_device(vbasedev);
>>>>>> + vbasedev->hiod = NULL;
>>>>>> return false;
>>>>>> }
>>>>>> - vbasedev->hiod = hiod;
>>>>>> return true;
>>>>>> }
>>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>>> --- a/hw/vfio/container.c
>>>>>> +++ b/hw/vfio/container.c
>>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>>> char *name, VFIODevice *vbasedev,
>>>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>> + return false;
>>>>> don't you want to go to err_alloc_ioas instead?
>>>> hmm, the err_alloc_ioas label is in a different function
>>>> iommufd_cdev_attach().
>>>>
>>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>>> label err_connect_bind ?
>>>>
>>>>
>>>> Thanks,
>>>>
>>>> C.
>>>>
>>>>
>>>>>> + }
>>>>>> +
>>>>>> group = vfio_get_group(groupid, as, errp);
>>>>>> if (!group) {
>>>>>> return false;
>>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>>> --- a/hw/vfio/helpers.c
>>>>>> +++ b/hw/vfio/helpers.c
>>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>> *vbasedev)
>>>>>> subsys = realpath(tmp, NULL);
>>>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>> }
>>>>>> +
>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>>> +{
>>>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>>> +
>>>>>> + if (!hiod) {
>>>>>> + return true;
>>>>>> + }
>>>>>> +
>>>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>>> vbasedev, errp);
>>>>>> +}
>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>>> --- a/hw/vfio/iommufd.c
>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>>> *name, VFIODevice *vbasedev,
>>>>>> space = vfio_get_address_space(as);
>>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>> + return false;
>>> Hum sorry my previous comment was targetting that place. I think
>>> unrolling is needed up to put_address_space
>>>
>>> so effectively this does not match err_alloc_ioas but I guess we would
>>> need another label
>>>
>> You're right. We haven't yet attached rthe device and that's what
>> err_alloc_ioas
>> would do. Adding another label not sure would make things cleaner given
>> the
>> ordering requirement. So maybe this instead?
>>
>> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice
>> *vbasedev,
>> space = vfio_get_address_space(as);
>>
>> if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> - return false;
>> + vfio_put_address_space(space);
>> + goto err_connect_bind;
>> }
>>
>> /* try to attach to an existing container in this space */
> I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
Hum yes you're right. connect and bind was done. I thought this was done
later. so err_alloc_ioas label looks good
Eric
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 8:24 ` Eric Auger
@ 2024-07-23 8:26 ` Joao Martins
0 siblings, 0 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:26 UTC (permalink / raw)
To: eric.auger, Duan, Zhenzhong, Cédric Le Goater,
qemu-devel@nongnu.org
Cc: Liu, Yi L, Alex Williamson, Jason Gunthorpe, Avihai Horon
On 23/07/2024 09:24, Eric Auger wrote:
>
>
> On 7/23/24 10:20, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>> Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>>> HostIOMMUDevice::realize() during attach_device()
>>>
>>> On 23/07/2024 08:55, Eric Auger wrote:
>>>>
>>>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>>>> On 7/23/24 09:38, Eric Auger wrote:
>>>>>> Hi Joao,
>>>>>>
>>>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>>>> of the device
>>>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>>>> allows the use
>>>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>>> tell
>>>>>>> if the IOMMU
>>>>>>> behind the device supports dirty tracking.
>>>>>>>
>>>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>>>> doesn't
>>>>>>> need any information from the (type1-iommu) backend to be
>>> initialized.
>>>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>>> the
>>>>>>> iommufd FD to be connected and having a devid to be able to
>>>>>>> successfully
>>>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>>> call
>>>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>>>> earlier at some point
>>>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>>>> different places within the backend .attach_device() implementation.
>>>>>>>
>>>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>>> ---
>>>>>>> include/hw/vfio/vfio-common.h | 1 +
>>>>>>> hw/vfio/common.c | 16 ++++++----------
>>>>>>> hw/vfio/container.c | 4 ++++
>>>>>>> hw/vfio/helpers.c | 11 +++++++++++
>>>>>>> hw/vfio/iommufd.c | 4 ++++
>>>>>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>>> void vfio_reset_handler(void *opaque);
>>>>>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>>> AddressSpace *as, Error **errp);
>>>>>>> void vfio_detach_device(VFIODevice *vbasedev);
>>>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>>>> --- a/hw/vfio/common.c
>>>>>>> +++ b/hw/vfio/common.c
>>>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>>> VFIODevice
>>>>>>> *vbasedev,
>>>>>>> {
>>>>>>> const VFIOIOMMUClass *ops =
>>>>>>>
>>>>>>>
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>>>> - HostIOMMUDevice *hiod;
>>>>>>> + HostIOMMUDevice *hiod = NULL;
>>>>>>> if (vbasedev->iommufd) {
>>>>>>> ops =
>>>>>>>
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>>> D));
>>>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>>>> VFIODevice *vbasedev,
>>>>>>> assert(ops);
>>>>>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>>> - return false;
>>>>>>> - }
>>>>>>> - if (vbasedev->mdev) {
>>>>>>> - return true;
>>>>>>> + if (!vbasedev->mdev) {
>>>>>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>>> hiod_typename));
>>>>>>> + vbasedev->hiod = hiod;
>>>>>>> }
>>>>>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>> vbasedev,
>>>>>>> errp)) {
>>>>>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>>> object_unref(hiod);
>>>>>>> - ops->detach_device(vbasedev);
>>>>>>> + vbasedev->hiod = NULL;
>>>>>>> return false;
>>>>>>> }
>>>>>>> - vbasedev->hiod = hiod;
>>>>>>> return true;
>>>>>>> }
>>>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>>>> --- a/hw/vfio/container.c
>>>>>>> +++ b/hw/vfio/container.c
>>>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>>>> char *name, VFIODevice *vbasedev,
>>>>>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>>> + return false;
>>>>>> don't you want to go to err_alloc_ioas instead?
>>>>> hmm, the err_alloc_ioas label is in a different function
>>>>> iommufd_cdev_attach().
>>>>>
>>>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>>>> label err_connect_bind ?
>>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> C.
>>>>>
>>>>>
>>>>>>> + }
>>>>>>> +
>>>>>>> group = vfio_get_group(groupid, as, errp);
>>>>>>> if (!group) {
>>>>>>> return false;
>>>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>>>> --- a/hw/vfio/helpers.c
>>>>>>> +++ b/hw/vfio/helpers.c
>>>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>>> *vbasedev)
>>>>>>> subsys = realpath(tmp, NULL);
>>>>>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>>> }
>>>>>>> +
>>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>>>> +{
>>>>>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>>>> +
>>>>>>> + if (!hiod) {
>>>>>>> + return true;
>>>>>>> + }
>>>>>>> +
>>>>>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>>>> vbasedev, errp);
>>>>>>> +}
>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>>>> *name, VFIODevice *vbasedev,
>>>>>>> space = vfio_get_address_space(as);
>>>>>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>>> + return false;
>>>> Hum sorry my previous comment was targetting that place. I think
>>>> unrolling is needed up to put_address_space
>>>>
>>>> so effectively this does not match err_alloc_ioas but I guess we would
>>>> need another label
>>>>
>>> You're right. We haven't yet attached rthe device and that's what
>>> err_alloc_ioas
>>> would do. Adding another label not sure would make things cleaner given
>>> the
>>> ordering requirement. So maybe this instead?
>>>
>>> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>>> VFIODevice
>>> *vbasedev,
>>> space = vfio_get_address_space(as);
>>>
>>> if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> - return false;
>>> + vfio_put_address_space(space);
>>> + goto err_connect_bind;
>>> }
>>>
>>> /* try to attach to an existing container in this space */
>> I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
> Hum yes you're right. connect and bind was done. I thought this was done
> later. so err_alloc_ioas label looks good
>
It seems you were right the first time.
I definitely haven't got coffee yet.
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 7:38 ` [PATCH v6 4/9] vfio/{iommufd,container}: " Eric Auger
2024-07-23 7:44 ` Cédric Le Goater
@ 2024-07-23 7:53 ` Joao Martins
2024-07-23 8:00 ` Cédric Le Goater
1 sibling, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 7:53 UTC (permalink / raw)
To: Cedric Le Goater, eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 23/07/2024 08:38, Eric Auger wrote:
> Hi Joao,
>
> On 7/22/24 23:13, Joao Martins wrote:
>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>> behind the device supports dirty tracking.
>>
>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>> need any information from the (type1-iommu) backend to be initialized.
>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>> iommufd FD to be connected and having a devid to be able to successfully
> Nit: maybe this comment shall be also added in iommufd.c before the call
> to vfio_device_hiod_realize() to avoid someone else to move that call
> earlier at some point
>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>> different places within the backend .attach_device() implementation.
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/hw/vfio/vfio-common.h | 1 +
>> hw/vfio/common.c | 16 ++++++----------
>> hw/vfio/container.c | 4 ++++
>> hw/vfio/helpers.c | 11 +++++++++++
>> hw/vfio/iommufd.c | 4 ++++
>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 1a96678f8c38..4e44b26d3c45 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>> void vfio_reset_handler(void *opaque);
>> struct vfio_device_info *vfio_get_device_info(int fd);
>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>> AddressSpace *as, Error **errp);
>> void vfio_detach_device(VFIODevice *vbasedev);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 784e266e6aab..da12cbd56408 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>> {
>> const VFIOIOMMUClass *ops =
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>> - HostIOMMUDevice *hiod;
>> + HostIOMMUDevice *hiod = NULL;
>>
>> if (vbasedev->iommufd) {
>> ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>
>> assert(ops);
>>
>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>> - return false;
>> - }
>>
>> - if (vbasedev->mdev) {
>> - return true;
>> + if (!vbasedev->mdev) {
>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> + vbasedev->hiod = hiod;
>> }
>>
>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>> object_unref(hiod);
>> - ops->detach_device(vbasedev);
>> + vbasedev->hiod = NULL;
>> return false;
>> }
>> - vbasedev->hiod = hiod;
>>
>> return true;
>> }
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 10cb4b4320ac..9ccdb639ac84 100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>
>> trace_vfio_attach_device(vbasedev->name, groupid);
>>
>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> + return false;
> don't you want to go to err_alloc_ioas instead?
Oh, yes, I thought I was doing that, but I am not :( Thanks for catching that
Your comment is spot on but in the wrong place.
vfio_legacy_attach_device() can just return false as it's at the top of the
function but here (...)
>> + }
>> +
>> group = vfio_get_group(groupid, as, errp);
>> if (!group) {
>> return false;
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 7e23e9080c9d..ea15c79db0a3 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>> subsys = realpath(tmp, NULL);
>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>> }
>> +
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>> +{
>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>> +
>> + if (!hiod) {
>> + return true;
>> + }
>> +
>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 5e2fc1ce089d..2324bf892c56 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>
>> space = vfio_get_address_space(as);
>>
>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> + return false;
>> + }
>> +
(...) we definitely need a goto err_alloc_ioas here.
Snip below:
@@ -482,7 +483,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
*vbasedev,
space = vfio_get_address_space(as);
if (!vfio_device_hiod_realize(vbasedev, errp)) {
- return false;
+ goto err_alloc_ioas;
}
/* try to attach to an existing container in this space */
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during attach_device()
2024-07-23 7:53 ` Joao Martins
@ 2024-07-23 8:00 ` Cédric Le Goater
0 siblings, 0 replies; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 8:00 UTC (permalink / raw)
To: Joao Martins, eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 09:53, Joao Martins wrote:
> On 23/07/2024 08:38, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>>> behind the device supports dirty tracking.
>>>
>>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>>> need any information from the (type1-iommu) backend to be initialized.
>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>> iommufd FD to be connected and having a devid to be able to successfully
>> Nit: maybe this comment shall be also added in iommufd.c before the call
>> to vfio_device_hiod_realize() to avoid someone else to move that call
>> earlier at some point
>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>> different places within the backend .attach_device() implementation.
>>>
>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 1 +
>>> hw/vfio/common.c | 16 ++++++----------
>>> hw/vfio/container.c | 4 ++++
>>> hw/vfio/helpers.c | 11 +++++++++++
>>> hw/vfio/iommufd.c | 4 ++++
>>> 5 files changed, 26 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 1a96678f8c38..4e44b26d3c45 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>> void vfio_reset_handler(void *opaque);
>>> struct vfio_device_info *vfio_get_device_info(int fd);
>>> bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>> bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> AddressSpace *as, Error **errp);
>>> void vfio_detach_device(VFIODevice *vbasedev);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 784e266e6aab..da12cbd56408 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> {
>>> const VFIOIOMMUClass *ops =
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>> - HostIOMMUDevice *hiod;
>>> + HostIOMMUDevice *hiod = NULL;
>>>
>>> if (vbasedev->iommufd) {
>>> ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>
>>> assert(ops);
>>>
>>> - if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> - return false;
>>> - }
>>>
>>> - if (vbasedev->mdev) {
>>> - return true;
>>> + if (!vbasedev->mdev) {
>>> + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> + vbasedev->hiod = hiod;
>>> }
>>>
>>> - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>>> + if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> object_unref(hiod);
>>> - ops->detach_device(vbasedev);
>>> + vbasedev->hiod = NULL;
>>> return false;
>>> }
>>> - vbasedev->hiod = hiod;
>>>
>>> return true;
>>> }
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>>
>>> trace_vfio_attach_device(vbasedev->name, groupid);
>>>
>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> + return false;
>> don't you want to go to err_alloc_ioas instead?
>
> Oh, yes, I thought I was doing that, but I am not :( Thanks for catching that
>
> Your comment is spot on but in the wrong place.
>
> vfio_legacy_attach_device() can just return false as it's at the top of the
> function but here (...)
>
>>> + }
>>> +
>>> group = vfio_get_group(groupid, as, errp);
>>> if (!group) {
>>> return false;
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>> subsys = realpath(tmp, NULL);
>>> return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>> }
>>> +
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> + HostIOMMUDevice *hiod = vbasedev->hiod;
>>> +
>>> + if (!hiod) {
>>> + return true;
>>> + }
>>> +
>>> + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 5e2fc1ce089d..2324bf892c56 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>>
>>> space = vfio_get_address_space(as);
>>>
>>> + if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> + return false;
>>> + }
>>> +
>
> (...) we definitely need a goto err_alloc_ioas here.
>
> Snip below:
>
> @@ -482,7 +483,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
> space = vfio_get_address_space(as);
>
> if (!vfio_device_hiod_realize(vbasedev, errp)) {
> - return false;
> + goto err_alloc_ioas;
> }
>
> /* try to attach to an existing container in this space */
>
ok. Applied the changes.
Thanks,
C.
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (3 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 4/9] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device() Joao Martins via
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 5:11 ` Duan, Zhenzhong
2024-07-23 7:50 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support Joao Martins
` (5 subsequent siblings)
10 siblings, 2 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
In preparation to using the dirty tracking UAPI, probe whether the IOMMU
supports dirty tracking. This is done via the data stored in
hiod::caps::hw_caps initialized from GET_HW_INFO.
Qemu doesn't know if VF dirty tracking is supported when allocating
hardware pagetable in iommufd_cdev_autodomains_get(). This is because
VFIODevice migration state hasn't been initialized *yet* hence it can't pick
between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING
even if later on VFIOMigration decides to use VF dirty tracking instead.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
include/hw/vfio/vfio-common.h | 2 ++
hw/vfio/iommufd.c | 20 ++++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 4e44b26d3c45..1e02c98b09ba 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
typedef struct VFIOIOASHwpt {
uint32_t hwpt_id;
+ uint32_t hwpt_flags;
QLIST_HEAD(, VFIODevice) device_list;
QLIST_ENTRY(VFIOIOASHwpt) next;
} VFIOIOASHwpt;
@@ -139,6 +140,7 @@ typedef struct VFIODevice {
OnOffAuto pre_copy_dirty_page_tracking;
bool dirty_pages_supported;
bool dirty_tracking;
+ bool iommu_dirty_tracking;
HostIOMMUDevice *hiod;
int devid;
IOMMUFDBackend *iommufd;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 2324bf892c56..7afea0b041ed 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -110,6 +110,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
iommufd_backend_disconnect(vbasedev->iommufd);
}
+static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
+{
+ return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+}
+
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
{
ERRP_GUARD();
@@ -246,6 +251,17 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
}
}
+ /*
+ * This is quite early and VFIO Migration state isn't yet fully
+ * initialized, thus rely only on IOMMU hardware capabilities as to
+ * whether IOMMU dirty tracking is going to be requested. Later
+ * vfio_migration_realize() may decide to use VF dirty tracking
+ * instead.
+ */
+ if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
+ flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ }
+
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
@@ -255,6 +271,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
hwpt = g_malloc0(sizeof(*hwpt));
hwpt->hwpt_id = hwpt_id;
+ hwpt->hwpt_flags = flags;
QLIST_INIT(&hwpt->device_list);
ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
@@ -265,8 +282,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
}
vbasedev->hwpt = hwpt;
+ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
+ container->bcontainer.dirty_pages_supported |=
+ vbasedev->iommu_dirty_tracking;
return true;
}
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* RE: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-22 21:13 ` [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability Joao Martins
@ 2024-07-23 5:11 ` Duan, Zhenzhong
2024-07-23 6:13 ` Joao Martins
2024-07-23 7:50 ` Eric Auger
1 sibling, 1 reply; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 5:11 UTC (permalink / raw)
To: Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty
>tracking capability
>
>In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>supports dirty tracking. This is done via the data stored in
>hiod::caps::hw_caps initialized from GET_HW_INFO.
>
>Qemu doesn't know if VF dirty tracking is supported when allocating
>hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>dirty tracking it always creates HWPTs with
>IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>even if later on VFIOMigration decides to use VF dirty tracking instead.
>
>Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>---
> include/hw/vfio/vfio-common.h | 2 ++
> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
> 2 files changed, 22 insertions(+)
>
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>index 4e44b26d3c45..1e02c98b09ba 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>
> typedef struct VFIOIOASHwpt {
> uint32_t hwpt_id;
>+ uint32_t hwpt_flags;
> QLIST_HEAD(, VFIODevice) device_list;
> QLIST_ENTRY(VFIOIOASHwpt) next;
> } VFIOIOASHwpt;
>@@ -139,6 +140,7 @@ typedef struct VFIODevice {
> OnOffAuto pre_copy_dirty_page_tracking;
> bool dirty_pages_supported;
> bool dirty_tracking;
>+ bool iommu_dirty_tracking;
> HostIOMMUDevice *hiod;
> int devid;
> IOMMUFDBackend *iommufd;
>diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>index 2324bf892c56..7afea0b041ed 100644
>--- a/hw/vfio/iommufd.c
>+++ b/hw/vfio/iommufd.c
>@@ -110,6 +110,11 @@ static void
>iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
> iommufd_backend_disconnect(vbasedev->iommufd);
> }
>
>+static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>+{
>+ return hwpt && hwpt->hwpt_flags &
>IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>+}
>+
> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
> {
> ERRP_GUARD();
>@@ -246,6 +251,17 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> }
> }
>
>+ /*
>+ * This is quite early and VFIO Migration state isn't yet fully
>+ * initialized, thus rely only on IOMMU hardware capabilities as to
>+ * whether IOMMU dirty tracking is going to be requested. Later
>+ * vfio_migration_realize() may decide to use VF dirty tracking
>+ * instead.
>+ */
>+ if (vbasedev->hiod->caps.hw_caps &
>IOMMU_HW_CAP_DIRTY_TRACKING) {
>+ flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>+ }
>+
> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
> container->ioas_id, flags,
> IOMMU_HWPT_DATA_NONE, 0, NULL,
>@@ -255,6 +271,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>
> hwpt = g_malloc0(sizeof(*hwpt));
> hwpt->hwpt_id = hwpt_id;
>+ hwpt->hwpt_flags = flags;
> QLIST_INIT(&hwpt->device_list);
>
> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>@@ -265,8 +282,11 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> }
>
> vbasedev->hwpt = hwpt;
>+ vbasedev->iommu_dirty_tracking =
>iommufd_hwpt_dirty_tracking(hwpt);
Don't we need to do same if attach to existing hwpt?
> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
> QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>+ container->bcontainer.dirty_pages_supported |=
>+ vbasedev->iommu_dirty_tracking;
> return true;
> }
>
>--
>2.17.2
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 5:11 ` Duan, Zhenzhong
@ 2024-07-23 6:13 ` Joao Martins
2024-07-23 6:57 ` Cédric Le Goater
0 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 6:13 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 06:11, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Subject: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty
>> tracking capability
>>
>> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>> supports dirty tracking. This is done via the data stored in
>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>
>> Qemu doesn't know if VF dirty tracking is supported when allocating
>> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>> dirty tracking it always creates HWPTs with
>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> include/hw/vfio/vfio-common.h | 2 ++
>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>> 2 files changed, 22 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>> common.h
>> index 4e44b26d3c45..1e02c98b09ba 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>>
>> typedef struct VFIOIOASHwpt {
>> uint32_t hwpt_id;
>> + uint32_t hwpt_flags;
>> QLIST_HEAD(, VFIODevice) device_list;
>> QLIST_ENTRY(VFIOIOASHwpt) next;
>> } VFIOIOASHwpt;
>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>> OnOffAuto pre_copy_dirty_page_tracking;
>> bool dirty_pages_supported;
>> bool dirty_tracking;
>> + bool iommu_dirty_tracking;
>> HostIOMMUDevice *hiod;
>> int devid;
>> IOMMUFDBackend *iommufd;
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 2324bf892c56..7afea0b041ed 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -110,6 +110,11 @@ static void
>> iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>> iommufd_backend_disconnect(vbasedev->iommufd);
>> }
>>
>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>> +{
>> + return hwpt && hwpt->hwpt_flags &
>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>> +}
>> +
>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>> {
>> ERRP_GUARD();
>> @@ -246,6 +251,17 @@ static bool
>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> }
>> }
>>
>> + /*
>> + * This is quite early and VFIO Migration state isn't yet fully
>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>> + * whether IOMMU dirty tracking is going to be requested. Later
>> + * vfio_migration_realize() may decide to use VF dirty tracking
>> + * instead.
>> + */
>> + if (vbasedev->hiod->caps.hw_caps &
>> IOMMU_HW_CAP_DIRTY_TRACKING) {
>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>> + }
>> +
>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>> container->ioas_id, flags,
>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>> @@ -255,6 +271,7 @@ static bool
>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>
>> hwpt = g_malloc0(sizeof(*hwpt));
>> hwpt->hwpt_id = hwpt_id;
>> + hwpt->hwpt_flags = flags;
>> QLIST_INIT(&hwpt->device_list);
>>
>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>> @@ -265,8 +282,11 @@ static bool
>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> }
>>
>> vbasedev->hwpt = hwpt;
>> + vbasedev->iommu_dirty_tracking =
>> iommufd_hwpt_dirty_tracking(hwpt);
>
> Don't we need to do same if attach to existing hwpt?
>
Nice catch!
Yes, we do need it e.g. we will need this fix up fo this patch
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 92b976464283..833a7400486c 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -305,6 +305,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
} else {
vbasedev->hwpt = hwpt;
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
+ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
return true;
}
}
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 6:13 ` Joao Martins
@ 2024-07-23 6:57 ` Cédric Le Goater
2024-07-23 7:02 ` Duan, Zhenzhong
0 siblings, 1 reply; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 6:57 UTC (permalink / raw)
To: Joao Martins, Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Jason Gunthorpe,
Avihai Horon
On 7/23/24 08:13, Joao Martins wrote:
> On 23/07/2024 06:11, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>> Subject: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty
>>> tracking capability
>>>
>>> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>>> supports dirty tracking. This is done via the data stored in
>>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>>
>>> Qemu doesn't know if VF dirty tracking is supported when allocating
>>> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>>> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>>> dirty tracking it always creates HWPTs with
>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 2 ++
>>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>>> 2 files changed, 22 insertions(+)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>>> common.h
>>> index 4e44b26d3c45..1e02c98b09ba 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>>>
>>> typedef struct VFIOIOASHwpt {
>>> uint32_t hwpt_id;
>>> + uint32_t hwpt_flags;
>>> QLIST_HEAD(, VFIODevice) device_list;
>>> QLIST_ENTRY(VFIOIOASHwpt) next;
>>> } VFIOIOASHwpt;
>>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>>> OnOffAuto pre_copy_dirty_page_tracking;
>>> bool dirty_pages_supported;
>>> bool dirty_tracking;
>>> + bool iommu_dirty_tracking;
>>> HostIOMMUDevice *hiod;
>>> int devid;
>>> IOMMUFDBackend *iommufd;
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 2324bf892c56..7afea0b041ed 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -110,6 +110,11 @@ static void
>>> iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>>> iommufd_backend_disconnect(vbasedev->iommufd);
>>> }
>>>
>>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> +{
>>> + return hwpt && hwpt->hwpt_flags &
>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>> +}
>>> +
>>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>> {
>>> ERRP_GUARD();
>>> @@ -246,6 +251,17 @@ static bool
>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>> }
>>> }
>>>
>>> + /*
>>> + * This is quite early and VFIO Migration state isn't yet fully
>>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>>> + * whether IOMMU dirty tracking is going to be requested. Later
>>> + * vfio_migration_realize() may decide to use VF dirty tracking
>>> + * instead.
>>> + */
>>> + if (vbasedev->hiod->caps.hw_caps &
>>> IOMMU_HW_CAP_DIRTY_TRACKING) {
>>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>> + }
>>> +
>>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>>> container->ioas_id, flags,
>>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>>> @@ -255,6 +271,7 @@ static bool
>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>
>>> hwpt = g_malloc0(sizeof(*hwpt));
>>> hwpt->hwpt_id = hwpt_id;
>>> + hwpt->hwpt_flags = flags;
>>> QLIST_INIT(&hwpt->device_list);
>>>
>>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>>> @@ -265,8 +282,11 @@ static bool
>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>> }
>>>
>>> vbasedev->hwpt = hwpt;
>>> + vbasedev->iommu_dirty_tracking =
>>> iommufd_hwpt_dirty_tracking(hwpt);
>>
>> Don't we need to do same if attach to existing hwpt?
>>
>
> Nice catch!
>
> Yes, we do need it e.g. we will need this fix up fo this patch
Fixed on vfio-9.1.
Thanks,
C.
>
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 92b976464283..833a7400486c 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -305,6 +305,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> } else {
> vbasedev->hwpt = hwpt;
> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
> + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
> return true;
> }
> }
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* RE: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 6:57 ` Cédric Le Goater
@ 2024-07-23 7:02 ` Duan, Zhenzhong
0 siblings, 0 replies; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 7:02 UTC (permalink / raw)
To: Cédric Le Goater, Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Jason Gunthorpe,
Avihai Horon
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty
>tracking capability
>
>On 7/23/24 08:13, Joao Martins wrote:
>> On 23/07/2024 06:11, Duan, Zhenzhong wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Joao Martins <joao.m.martins@oracle.com>
>>>> Subject: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty
>>>> tracking capability
>>>>
>>>> In preparation to using the dirty tracking UAPI, probe whether the
>IOMMU
>>>> supports dirty tracking. This is done via the data stored in
>>>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>>>
>>>> Qemu doesn't know if VF dirty tracking is supported when allocating
>>>> hardware pagetable in iommufd_cdev_autodomains_get(). This is
>because
>>>> VFIODevice migration state hasn't been initialized *yet* hence it can't
>pick
>>>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU
>supports
>>>> dirty tracking it always creates HWPTs with
>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>>>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>>>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>> include/hw/vfio/vfio-common.h | 2 ++
>>>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>>>> 2 files changed, 22 insertions(+)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>>>> common.h
>>>> index 4e44b26d3c45..1e02c98b09ba 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend
>IOMMUFDBackend;
>>>>
>>>> typedef struct VFIOIOASHwpt {
>>>> uint32_t hwpt_id;
>>>> + uint32_t hwpt_flags;
>>>> QLIST_HEAD(, VFIODevice) device_list;
>>>> QLIST_ENTRY(VFIOIOASHwpt) next;
>>>> } VFIOIOASHwpt;
>>>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>>>> OnOffAuto pre_copy_dirty_page_tracking;
>>>> bool dirty_pages_supported;
>>>> bool dirty_tracking;
>>>> + bool iommu_dirty_tracking;
>>>> HostIOMMUDevice *hiod;
>>>> int devid;
>>>> IOMMUFDBackend *iommufd;
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 2324bf892c56..7afea0b041ed 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -110,6 +110,11 @@ static void
>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>>>> iommufd_backend_disconnect(vbasedev->iommufd);
>>>> }
>>>>
>>>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>> +{
>>>> + return hwpt && hwpt->hwpt_flags &
>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>> +}
>>>> +
>>>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>>> {
>>>> ERRP_GUARD();
>>>> @@ -246,6 +251,17 @@ static bool
>>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>> }
>>>> }
>>>>
>>>> + /*
>>>> + * This is quite early and VFIO Migration state isn't yet fully
>>>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>>>> + * whether IOMMU dirty tracking is going to be requested. Later
>>>> + * vfio_migration_realize() may decide to use VF dirty tracking
>>>> + * instead.
>>>> + */
>>>> + if (vbasedev->hiod->caps.hw_caps &
>>>> IOMMU_HW_CAP_DIRTY_TRACKING) {
>>>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>> + }
>>>> +
>>>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>>>> container->ioas_id, flags,
>>>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>>>> @@ -255,6 +271,7 @@ static bool
>>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>>
>>>> hwpt = g_malloc0(sizeof(*hwpt));
>>>> hwpt->hwpt_id = hwpt_id;
>>>> + hwpt->hwpt_flags = flags;
>>>> QLIST_INIT(&hwpt->device_list);
>>>>
>>>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id,
>errp);
>>>> @@ -265,8 +282,11 @@ static bool
>>>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>> }
>>>>
>>>> vbasedev->hwpt = hwpt;
>>>> + vbasedev->iommu_dirty_tracking =
>>>> iommufd_hwpt_dirty_tracking(hwpt);
>>>
>>> Don't we need to do same if attach to existing hwpt?
>>>
>>
>> Nice catch!
>>
>> Yes, we do need it e.g. we will need this fix up fo this patch
>
>
>Fixed on vfio-9.1.
Feel free to add my RB,
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Thanks
Zhenzhong
>
>Thanks,
>
>C.
>
>
>
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 92b976464283..833a7400486c 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -305,6 +305,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> } else {
>> vbasedev->hwpt = hwpt;
>> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>> + vbasedev->iommu_dirty_tracking =
>iommufd_hwpt_dirty_tracking(hwpt);
>> return true;
>> }
>> }
>>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-22 21:13 ` [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability Joao Martins
2024-07-23 5:11 ` Duan, Zhenzhong
@ 2024-07-23 7:50 ` Eric Auger
2024-07-23 8:00 ` Joao Martins
1 sibling, 1 reply; 51+ messages in thread
From: Eric Auger @ 2024-07-23 7:50 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
Hi Joao,
On 7/22/24 23:13, Joao Martins wrote:
> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
> supports dirty tracking. This is done via the data stored in
> hiod::caps::hw_caps initialized from GET_HW_INFO.
>
> Qemu doesn't know if VF dirty tracking is supported when allocating
> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
> dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING
> even if later on VFIOMigration decides to use VF dirty tracking instead.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> include/hw/vfio/vfio-common.h | 2 ++
> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
> 2 files changed, 22 insertions(+)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 4e44b26d3c45..1e02c98b09ba 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>
> typedef struct VFIOIOASHwpt {
> uint32_t hwpt_id;
> + uint32_t hwpt_flags;
> QLIST_HEAD(, VFIODevice) device_list;
> QLIST_ENTRY(VFIOIOASHwpt) next;
> } VFIOIOASHwpt;
> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
> OnOffAuto pre_copy_dirty_page_tracking;
> bool dirty_pages_supported;
> bool dirty_tracking;
> + bool iommu_dirty_tracking;
> HostIOMMUDevice *hiod;
> int devid;
> IOMMUFDBackend *iommufd;
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 2324bf892c56..7afea0b041ed 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -110,6 +110,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
> iommufd_backend_disconnect(vbasedev->iommufd);
> }
>
> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
> +{
> + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +}
> +
> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
> {
> ERRP_GUARD();
> @@ -246,6 +251,17 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> }
> }
>
> + /*
> + * This is quite early and VFIO Migration state isn't yet fully
> + * initialized, thus rely only on IOMMU hardware capabilities as to
> + * whether IOMMU dirty tracking is going to be requested. Later
> + * vfio_migration_realize() may decide to use VF dirty tracking
> + * instead.
> + */
> + if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> + }
> +
> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
> container->ioas_id, flags,
> IOMMU_HWPT_DATA_NONE, 0, NULL,
> @@ -255,6 +271,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>
> hwpt = g_malloc0(sizeof(*hwpt));
> hwpt->hwpt_id = hwpt_id;
> + hwpt->hwpt_flags = flags;
> QLIST_INIT(&hwpt->device_list);
>
> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
> @@ -265,8 +282,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> }
>
> vbasedev->hwpt = hwpt;
> + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
> QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
> + container->bcontainer.dirty_pages_supported |=
> + vbasedev->iommu_dirty_tracking;
Is it possible to have several devices with different
iommu_dirty_tracking value in the same container? In other words would they be attached to different container/ioas?
Eric
> return true;
> }
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 7:50 ` Eric Auger
@ 2024-07-23 8:00 ` Joao Martins
2024-07-23 8:09 ` Eric Auger
0 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:00 UTC (permalink / raw)
To: eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 08:50, Eric Auger wrote:
> Hi Joao,
>
> On 7/22/24 23:13, Joao Martins wrote:
>> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>> supports dirty tracking. This is done via the data stored in
>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>
>> Qemu doesn't know if VF dirty tracking is supported when allocating
>> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>> dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> include/hw/vfio/vfio-common.h | 2 ++
>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>> 2 files changed, 22 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 4e44b26d3c45..1e02c98b09ba 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>>
>> typedef struct VFIOIOASHwpt {
>> uint32_t hwpt_id;
>> + uint32_t hwpt_flags;
>> QLIST_HEAD(, VFIODevice) device_list;
>> QLIST_ENTRY(VFIOIOASHwpt) next;
>> } VFIOIOASHwpt;
>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>> OnOffAuto pre_copy_dirty_page_tracking;
>> bool dirty_pages_supported;
>> bool dirty_tracking;
>> + bool iommu_dirty_tracking;
>> HostIOMMUDevice *hiod;
>> int devid;
>> IOMMUFDBackend *iommufd;
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 2324bf892c56..7afea0b041ed 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -110,6 +110,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>> iommufd_backend_disconnect(vbasedev->iommufd);
>> }
>>
>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>> +{
>> + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>> +}
>> +
>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>> {
>> ERRP_GUARD();
>> @@ -246,6 +251,17 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> }
>> }
>>
>> + /*
>> + * This is quite early and VFIO Migration state isn't yet fully
>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>> + * whether IOMMU dirty tracking is going to be requested. Later
>> + * vfio_migration_realize() may decide to use VF dirty tracking
>> + * instead.
>> + */
>> + if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>> + }
>> +
>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>> container->ioas_id, flags,
>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>> @@ -255,6 +271,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>
>> hwpt = g_malloc0(sizeof(*hwpt));
>> hwpt->hwpt_id = hwpt_id;
>> + hwpt->hwpt_flags = flags;
>> QLIST_INIT(&hwpt->device_list);
>>
>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>> @@ -265,8 +282,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> }
>>
>> vbasedev->hwpt = hwpt;
>> + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
>> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>> QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>> + container->bcontainer.dirty_pages_supported |=
>> + vbasedev->iommu_dirty_tracking;
> Is it possible to have several devices with different
>
> iommu_dirty_tracking value in the same container? In other words would they be attached to different container/ioas?
>
In theory, yes, they can be in the same container/ioas. But I guess with IOMMUFD
it's possible that we can allocate different containers for different devices
given that we can manipulate/pass a different IOMMUFD object.
In pratice I don't know if such HW platforms even exist where different IOMMU
instances present different value of dirty tracking, given that this is a IOMMU
feature, rather than endpoint dependent. In x86 it's homogeneous, and likely on
smmuv3 server too. There are indeed endpoint related features which may be
different in IOMMU instances, but those only reflect on logic that the device
needs to implement (e.g. PCIe PRS).
Having said that I can only think of mdevs, where the realize() will block
migration because the vbasedev->iommu_dirty_tracking is 0 should the mdev not
support dma-logging vfio (but it doesn't go via this codepath above anyhow).
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 8:00 ` Joao Martins
@ 2024-07-23 8:09 ` Eric Auger
2024-07-23 8:17 ` Joao Martins
2024-07-23 11:59 ` Jason Gunthorpe
0 siblings, 2 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:09 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/23/24 10:00, Joao Martins wrote:
> On 23/07/2024 08:50, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>>> supports dirty tracking. This is done via the data stored in
>>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>>
>>> Qemu doesn't know if VF dirty tracking is supported when allocating
>>> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>>> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>>> dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 2 ++
>>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>>> 2 files changed, 22 insertions(+)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 4e44b26d3c45..1e02c98b09ba 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>>>
>>> typedef struct VFIOIOASHwpt {
>>> uint32_t hwpt_id;
>>> + uint32_t hwpt_flags;
>>> QLIST_HEAD(, VFIODevice) device_list;
>>> QLIST_ENTRY(VFIOIOASHwpt) next;
>>> } VFIOIOASHwpt;
>>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>>> OnOffAuto pre_copy_dirty_page_tracking;
>>> bool dirty_pages_supported;
>>> bool dirty_tracking;
>>> + bool iommu_dirty_tracking;
>>> HostIOMMUDevice *hiod;
>>> int devid;
>>> IOMMUFDBackend *iommufd;
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 2324bf892c56..7afea0b041ed 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -110,6 +110,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>>> iommufd_backend_disconnect(vbasedev->iommufd);
>>> }
>>>
>>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> +{
>>> + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>> +}
>>> +
>>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>> {
>>> ERRP_GUARD();
>>> @@ -246,6 +251,17 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>> }
>>> }
>>>
>>> + /*
>>> + * This is quite early and VFIO Migration state isn't yet fully
>>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>>> + * whether IOMMU dirty tracking is going to be requested. Later
>>> + * vfio_migration_realize() may decide to use VF dirty tracking
>>> + * instead.
>>> + */
>>> + if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>> + }
>>> +
>>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>>> container->ioas_id, flags,
>>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>>> @@ -255,6 +271,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>
>>> hwpt = g_malloc0(sizeof(*hwpt));
>>> hwpt->hwpt_id = hwpt_id;
>>> + hwpt->hwpt_flags = flags;
>>> QLIST_INIT(&hwpt->device_list);
>>>
>>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>>> @@ -265,8 +282,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>> }
>>>
>>> vbasedev->hwpt = hwpt;
>>> + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
>>> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>>> QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>>> + container->bcontainer.dirty_pages_supported |=
>>> + vbasedev->iommu_dirty_tracking;
>> Is it possible to have several devices with different
>>
>> iommu_dirty_tracking value in the same container? In other words would they be attached to different container/ioas?
>>
> In theory, yes, they can be in the same container/ioas. But I guess with IOMMUFD
> it's possible that we can allocate different containers for different devices
> given that we can manipulate/pass a different IOMMUFD object.
Yes I would have suspected they would end up in different
containers/ioas but I am not sure.
>
> In pratice I don't know if such HW platforms even exist where different IOMMU
> instances present different value of dirty tracking, given that this is a IOMMU
> feature, rather than endpoint dependent. In x86 it's homogeneous, and likely on
> smmuv3 server too. There are indeed endpoint related features which may be
on ARM you may have several SMMU instances. I do agree that the
likelyhood of those instances having heterogeneous dirty page tracking
support is low but well I don't know. Maybe we should add a wanrning at
least, later on if this case arises.
Eric
> different in IOMMU instances, but those only reflect on logic that the device
> needs to implement (e.g. PCIe PRS).
>
> Having said that I can only think of mdevs, where the realize() will block
> migration because the vbasedev->iommu_dirty_tracking is 0 should the mdev not
> support dma-logging vfio (but it doesn't go via this codepath above anyhow).
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 8:09 ` Eric Auger
@ 2024-07-23 8:17 ` Joao Martins
2024-07-23 11:59 ` Jason Gunthorpe
1 sibling, 0 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:17 UTC (permalink / raw)
To: eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 09:09, Eric Auger wrote:
>
>
> On 7/23/24 10:00, Joao Martins wrote:
>> On 23/07/2024 08:50, Eric Auger wrote:
>>> Hi Joao,
>>>
>>> On 7/22/24 23:13, Joao Martins wrote:
>>>> In preparation to using the dirty tracking UAPI, probe whether the IOMMU
>>>> supports dirty tracking. This is done via the data stored in
>>>> hiod::caps::hw_caps initialized from GET_HW_INFO.
>>>>
>>>> Qemu doesn't know if VF dirty tracking is supported when allocating
>>>> hardware pagetable in iommufd_cdev_autodomains_get(). This is because
>>>> VFIODevice migration state hasn't been initialized *yet* hence it can't pick
>>>> between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports
>>>> dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING
>>>> even if later on VFIOMigration decides to use VF dirty tracking instead.
>>>>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>> include/hw/vfio/vfio-common.h | 2 ++
>>>> hw/vfio/iommufd.c | 20 ++++++++++++++++++++
>>>> 2 files changed, 22 insertions(+)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index 4e44b26d3c45..1e02c98b09ba 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>>>>
>>>> typedef struct VFIOIOASHwpt {
>>>> uint32_t hwpt_id;
>>>> + uint32_t hwpt_flags;
>>>> QLIST_HEAD(, VFIODevice) device_list;
>>>> QLIST_ENTRY(VFIOIOASHwpt) next;
>>>> } VFIOIOASHwpt;
>>>> @@ -139,6 +140,7 @@ typedef struct VFIODevice {
>>>> OnOffAuto pre_copy_dirty_page_tracking;
>>>> bool dirty_pages_supported;
>>>> bool dirty_tracking;
>>>> + bool iommu_dirty_tracking;
>>>> HostIOMMUDevice *hiod;
>>>> int devid;
>>>> IOMMUFDBackend *iommufd;
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 2324bf892c56..7afea0b041ed 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -110,6 +110,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>>>> iommufd_backend_disconnect(vbasedev->iommufd);
>>>> }
>>>>
>>>> +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>> +{
>>>> + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>> +}
>>>> +
>>>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>>> {
>>>> ERRP_GUARD();
>>>> @@ -246,6 +251,17 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>> }
>>>> }
>>>>
>>>> + /*
>>>> + * This is quite early and VFIO Migration state isn't yet fully
>>>> + * initialized, thus rely only on IOMMU hardware capabilities as to
>>>> + * whether IOMMU dirty tracking is going to be requested. Later
>>>> + * vfio_migration_realize() may decide to use VF dirty tracking
>>>> + * instead.
>>>> + */
>>>> + if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>>>> + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>> + }
>>>> +
>>>> if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
>>>> container->ioas_id, flags,
>>>> IOMMU_HWPT_DATA_NONE, 0, NULL,
>>>> @@ -255,6 +271,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>>
>>>> hwpt = g_malloc0(sizeof(*hwpt));
>>>> hwpt->hwpt_id = hwpt_id;
>>>> + hwpt->hwpt_flags = flags;
>>>> QLIST_INIT(&hwpt->device_list);
>>>>
>>>> ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>>>> @@ -265,8 +282,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>>> }
>>>>
>>>> vbasedev->hwpt = hwpt;
>>>> + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
>>>> QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>>>> QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
>>>> + container->bcontainer.dirty_pages_supported |=
>>>> + vbasedev->iommu_dirty_tracking;
>>> Is it possible to have several devices with different
>>>
>>> iommu_dirty_tracking value in the same container? In other words would they be attached to different container/ioas?
>>>
>> In theory, yes, they can be in the same container/ioas. But I guess with IOMMUFD
>> it's possible that we can allocate different containers for different devices
>> given that we can manipulate/pass a different IOMMUFD object.
> Yes I would have suspected they would end up in different
> containers/ioas but I am not sure.
>>
>> In pratice I don't know if such HW platforms even exist where different IOMMU
>> instances present different value of dirty tracking, given that this is a IOMMU
>> feature, rather than endpoint dependent. In x86 it's homogeneous, and likely on
>> smmuv3 server too. There are indeed endpoint related features which may be
> on ARM you may have several SMMU instances. I do agree that the
> likelyhood of those instances having heterogeneous dirty page tracking
> support is low but well I don't know. Maybe we should add a wanrning at
> least, later on if this case arises.
>
Yeap that's sensible as it's not immediately obvious. Something like:
@@ -345,6 +346,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
container->bcontainer.dirty_pages_supported |=
vbasedev->iommu_dirty_tracking;
+ if (container->bcontainer.dirty_pages_supported &&
+ !vbasedev->iommu_dirty_tracking) {
+ warn_report("IOMMU instance for device %s doesn't support dirty tracking",
+ vbasedev->name);
+ }
return true;
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability
2024-07-23 8:09 ` Eric Auger
2024-07-23 8:17 ` Joao Martins
@ 2024-07-23 11:59 ` Jason Gunthorpe
1 sibling, 0 replies; 51+ messages in thread
From: Jason Gunthorpe @ 2024-07-23 11:59 UTC (permalink / raw)
To: Eric Auger
Cc: Joao Martins, qemu-devel, Yi Liu, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Avihai Horon
On Tue, Jul 23, 2024 at 10:09:13AM +0200, Eric Auger wrote:
> > In pratice I don't know if such HW platforms even exist where different IOMMU
> > instances present different value of dirty tracking, given that this is a IOMMU
> > feature, rather than endpoint dependent. In x86 it's homogeneous, and likely on
> > smmuv3 server too. There are indeed endpoint related features
> > which may be
> on ARM you may have several SMMU instances. I do agree that the
> likelyhood of those instances having heterogeneous dirty page
> tracking support is low but well I don't know. Maybe we should add a
> wanrning at least, later on if this case arises.
From what I understand about ARM IP there are additional system wide
complexities to implement HTTU, it requires the SMMU have CPU coherent
atomics, which means it has to use a different kind of bus..
Hopefully nobody does this, but still, I wouldn't assume the same
consistency as x86...
Jason
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (4 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 5/9] vfio/iommufd: Probe and request hwpt dirty tracking capability Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 8:03 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 7/9] vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support Joao Martins
` (4 subsequent siblings)
10 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
ioctl(iommufd, IOMMU_HWPT_SET_DIRTY_TRACKING, arg) is the UAPI that
enables or disables dirty page tracking. The ioctl is used if the hwpt
has been created with dirty tracking supported domain (stored in
hwpt::flags) and it is called on the whole list of iommu domains.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/sysemu/iommufd.h | 2 ++
backends/iommufd.c | 23 +++++++++++++++++++++++
hw/vfio/iommufd.c | 32 ++++++++++++++++++++++++++++++++
backends/trace-events | 1 +
4 files changed, 58 insertions(+)
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index e917e7591d05..6fb412f61144 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -55,6 +55,8 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
uint32_t data_type, uint32_t data_len,
void *data_ptr, uint32_t *out_hwpt,
Error **errp);
+bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
+ bool start, Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 06b135111f30..b97883503884 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -238,6 +238,29 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
return true;
}
+bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
+ uint32_t hwpt_id, bool start,
+ Error **errp)
+{
+ int ret;
+ struct iommu_hwpt_set_dirty_tracking set_dirty = {
+ .size = sizeof(set_dirty),
+ .hwpt_id = hwpt_id,
+ .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+ };
+
+ ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
+ trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
+ hwpt_id);
+ return false;
+ }
+
+ return true;
+}
+
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
uint64_t *caps, Error **errp)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 7afea0b041ed..b882a3f59a6e 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -115,6 +115,37 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
+static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ const VFIOIOMMUFDContainer *container =
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ VFIOIOASHwpt *hwpt;
+
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+
+ if (!iommufd_backend_set_dirty_tracking(container->be,
+ hwpt->hwpt_id, start, errp)) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+ iommufd_backend_set_dirty_tracking(container->be,
+ hwpt->hwpt_id, !start, NULL);
+ }
+ return -EINVAL;
+}
+
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
{
ERRP_GUARD();
@@ -725,6 +756,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
+ vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
};
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
diff --git a/backends/trace-events b/backends/trace-events
index 4d8ac02fe7d6..28aca3b859d4 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -16,3 +16,4 @@ iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t si
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
+iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
2024-07-22 21:13 ` [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support Joao Martins
@ 2024-07-23 8:03 ` Eric Auger
2024-07-23 8:14 ` Joao Martins
0 siblings, 1 reply; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:03 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
Hi Joao,
On 7/22/24 23:13, Joao Martins wrote:
> ioctl(iommufd, IOMMU_HWPT_SET_DIRTY_TRACKING, arg) is the UAPI that
> enables or disables dirty page tracking. The ioctl is used if the hwpt
> has been created with dirty tracking supported domain (stored in
> hwpt::flags) and it is called on the whole list of iommu domains.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/sysemu/iommufd.h | 2 ++
> backends/iommufd.c | 23 +++++++++++++++++++++++
> hw/vfio/iommufd.c | 32 ++++++++++++++++++++++++++++++++
> backends/trace-events | 1 +
> 4 files changed, 58 insertions(+)
>
> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
> index e917e7591d05..6fb412f61144 100644
> --- a/include/sysemu/iommufd.h
> +++ b/include/sysemu/iommufd.h
> @@ -55,6 +55,8 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
> uint32_t data_type, uint32_t data_len,
> void *data_ptr, uint32_t *out_hwpt,
> Error **errp);
> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
> + bool start, Error **errp);
>
> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> #endif
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 06b135111f30..b97883503884 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -238,6 +238,29 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
> return true;
> }
>
> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
> + uint32_t hwpt_id, bool start,
> + Error **errp)
> +{
> + int ret;
> + struct iommu_hwpt_set_dirty_tracking set_dirty = {
> + .size = sizeof(set_dirty),
> + .hwpt_id = hwpt_id,
> + .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
> + };
> +
> + ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
> + trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
> + if (ret) {
> + error_setg_errno(errp, errno,
> + "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
> + hwpt_id);
> + return false;
> + }
> +
> + return true;
> +}
> +
> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
> uint32_t *type, void *data, uint32_t len,
> uint64_t *caps, Error **errp)
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 7afea0b041ed..b882a3f59a6e 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -115,6 +115,37 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
> return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> }
>
> +static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
> + bool start, Error **errp)
> +{
> + const VFIOIOMMUFDContainer *container =
> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
> + VFIOIOASHwpt *hwpt;
> +
> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
> + continue;
> + }
so here I see you handle the case where we have hwpts with and without
support for dirty tracking within the same container so I guess this
answers my previous question. So do you want to tag a container as
dirty_pages_supported = true as soon as one device is backed up with a
dirty tracking hwpt within that container? I think I miss the high level
view of when this case may happen and why the devices do not end up in
different containers/ioas. But maybe I completely mix up things &
objects. Eric
> +
> + if (!iommufd_backend_set_dirty_tracking(container->be,
> + hwpt->hwpt_id, start, errp)) {
> + goto err;
> + }
> + }
> +
> + return 0;
> +
> +err:
> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
> + continue;
> + }
> + iommufd_backend_set_dirty_tracking(container->be,
> + hwpt->hwpt_id, !start, NULL);
> + }
> + return -EINVAL;
> +}
> +
> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
> {
> ERRP_GUARD();
> @@ -725,6 +756,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
> vioc->attach_device = iommufd_cdev_attach;
> vioc->detach_device = iommufd_cdev_detach;
> vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
> + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
> };
>
> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
> diff --git a/backends/trace-events b/backends/trace-events
> index 4d8ac02fe7d6..28aca3b859d4 100644
> --- a/backends/trace-events
> +++ b/backends/trace-events
> @@ -16,3 +16,4 @@ iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t si
> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
> iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
> +iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
2024-07-23 8:03 ` Eric Auger
@ 2024-07-23 8:14 ` Joao Martins
2024-07-23 8:17 ` Eric Auger
0 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:14 UTC (permalink / raw)
To: eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 09:03, Eric Auger wrote:
> Hi Joao,
>
> On 7/22/24 23:13, Joao Martins wrote:
>> ioctl(iommufd, IOMMU_HWPT_SET_DIRTY_TRACKING, arg) is the UAPI that
>> enables or disables dirty page tracking. The ioctl is used if the hwpt
>> has been created with dirty tracking supported domain (stored in
>> hwpt::flags) and it is called on the whole list of iommu domains.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/sysemu/iommufd.h | 2 ++
>> backends/iommufd.c | 23 +++++++++++++++++++++++
>> hw/vfio/iommufd.c | 32 ++++++++++++++++++++++++++++++++
>> backends/trace-events | 1 +
>> 4 files changed, 58 insertions(+)
>>
>> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>> index e917e7591d05..6fb412f61144 100644
>> --- a/include/sysemu/iommufd.h
>> +++ b/include/sysemu/iommufd.h
>> @@ -55,6 +55,8 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
>> uint32_t data_type, uint32_t data_len,
>> void *data_ptr, uint32_t *out_hwpt,
>> Error **errp);
>> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>> + bool start, Error **errp);
>>
>> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
>> #endif
>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>> index 06b135111f30..b97883503884 100644
>> --- a/backends/iommufd.c
>> +++ b/backends/iommufd.c
>> @@ -238,6 +238,29 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
>> return true;
>> }
>>
>> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
>> + uint32_t hwpt_id, bool start,
>> + Error **errp)
>> +{
>> + int ret;
>> + struct iommu_hwpt_set_dirty_tracking set_dirty = {
>> + .size = sizeof(set_dirty),
>> + .hwpt_id = hwpt_id,
>> + .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
>> + };
>> +
>> + ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
>> + trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
>> + if (ret) {
>> + error_setg_errno(errp, errno,
>> + "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
>> + hwpt_id);
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
>> uint32_t *type, void *data, uint32_t len,
>> uint64_t *caps, Error **errp)
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 7afea0b041ed..b882a3f59a6e 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -115,6 +115,37 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>> return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>> }
>>
>> +static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
>> + bool start, Error **errp)
>> +{
>> + const VFIOIOMMUFDContainer *container =
>> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>> + VFIOIOASHwpt *hwpt;
>> +
>> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
>> + continue;
>> + }
> so here I see you handle the case where we have hwpts with and without
> support for dirty tracking within the same container so I guess this
> answers my previous question. So do you want to tag a container as
> dirty_pages_supported = true as soon as one device is backed up with a
> dirty tracking hwpt within that container? I think I miss the high level
> view of when this case may happen and why the devices do not end up in
> different containers/ioas. But maybe I completely mix up things &
> objects. Eric
I think the only gap I have in this series that I don't catch exactly right in
all this logic, is when the IOMMU dirty tracking is not homogeneous, which
remains to be seen in pratice (from h/w perspective). That's where we currently
have a gap here. Other than that, either we do 'all VFs do dirty tracking' or
'all devices are backed by IOMMU'.
A container may have different hwpt with different capabilities e.g. systems
where IOMMU instances have different dirty tracking ability (again I don't know
if these exist). Which, on hwpt without dirty tracking, on which the devices
attached to it that lack VF dirty tracking should be added an LM blocker.
These checks were added from the previous cycles, but I was actually thinking in
making these g_assert() to make sure this is not exercised like that. But
considering I plan on improve a mixed usage of VF dirty tracking with IOMMU I
left them as simply checks.
>> +
>> + if (!iommufd_backend_set_dirty_tracking(container->be,
>> + hwpt->hwpt_id, start, errp)) {
>> + goto err;
>> + }
>> + }
>> +
>> + return 0;
>> +
>> +err:
>> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
>> + continue;
>> + }
>> + iommufd_backend_set_dirty_tracking(container->be,
>> + hwpt->hwpt_id, !start, NULL);
>> + }
>> + return -EINVAL;
>> +}
>> +
>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>> {
>> ERRP_GUARD();
>> @@ -725,6 +756,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
>> vioc->attach_device = iommufd_cdev_attach;
>> vioc->detach_device = iommufd_cdev_detach;
>> vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
>> + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
>> };
>>
>> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>> diff --git a/backends/trace-events b/backends/trace-events
>> index 4d8ac02fe7d6..28aca3b859d4 100644
>> --- a/backends/trace-events
>> +++ b/backends/trace-events
>> @@ -16,3 +16,4 @@ iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t si
>> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
>> iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
>> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
>> +iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
2024-07-23 8:14 ` Joao Martins
@ 2024-07-23 8:17 ` Eric Auger
0 siblings, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:17 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/23/24 10:14, Joao Martins wrote:
> On 23/07/2024 09:03, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> ioctl(iommufd, IOMMU_HWPT_SET_DIRTY_TRACKING, arg) is the UAPI that
>>> enables or disables dirty page tracking. The ioctl is used if the hwpt
>>> has been created with dirty tracking supported domain (stored in
>>> hwpt::flags) and it is called on the whole list of iommu domains.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> include/sysemu/iommufd.h | 2 ++
>>> backends/iommufd.c | 23 +++++++++++++++++++++++
>>> hw/vfio/iommufd.c | 32 ++++++++++++++++++++++++++++++++
>>> backends/trace-events | 1 +
>>> 4 files changed, 58 insertions(+)
>>>
>>> diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
>>> index e917e7591d05..6fb412f61144 100644
>>> --- a/include/sysemu/iommufd.h
>>> +++ b/include/sysemu/iommufd.h
>>> @@ -55,6 +55,8 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
>>> uint32_t data_type, uint32_t data_len,
>>> void *data_ptr, uint32_t *out_hwpt,
>>> Error **errp);
>>> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
>>> + bool start, Error **errp);
>>>
>>> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
>>> #endif
>>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>>> index 06b135111f30..b97883503884 100644
>>> --- a/backends/iommufd.c
>>> +++ b/backends/iommufd.c
>>> @@ -238,6 +238,29 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
>>> return true;
>>> }
>>>
>>> +bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
>>> + uint32_t hwpt_id, bool start,
>>> + Error **errp)
>>> +{
>>> + int ret;
>>> + struct iommu_hwpt_set_dirty_tracking set_dirty = {
>>> + .size = sizeof(set_dirty),
>>> + .hwpt_id = hwpt_id,
>>> + .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
>>> + };
>>> +
>>> + ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
>>> + trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
>>> + if (ret) {
>>> + error_setg_errno(errp, errno,
>>> + "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
>>> + hwpt_id);
>>> + return false;
>>> + }
>>> +
>>> + return true;
>>> +}
>>> +
>>> bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
>>> uint32_t *type, void *data, uint32_t len,
>>> uint64_t *caps, Error **errp)
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 7afea0b041ed..b882a3f59a6e 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -115,6 +115,37 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>> }
>>>
>>> +static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
>>> + bool start, Error **errp)
>>> +{
>>> + const VFIOIOMMUFDContainer *container =
>>> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>>> + VFIOIOASHwpt *hwpt;
>>> +
>>> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>>> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
>>> + continue;
>>> + }
>> so here I see you handle the case where we have hwpts with and without
>> support for dirty tracking within the same container so I guess this
>> answers my previous question. So do you want to tag a container as
>> dirty_pages_supported = true as soon as one device is backed up with a
>> dirty tracking hwpt within that container? I think I miss the high level
>> view of when this case may happen and why the devices do not end up in
>> different containers/ioas. But maybe I completely mix up things &
>> objects. Eric
> I think the only gap I have in this series that I don't catch exactly right in
> all this logic, is when the IOMMU dirty tracking is not homogeneous, which
> remains to be seen in pratice (from h/w perspective). That's where we currently
> have a gap here. Other than that, either we do 'all VFs do dirty tracking' or
> 'all devices are backed by IOMMU'.
>
> A container may have different hwpt with different capabilities e.g. systems
> where IOMMU instances have different dirty tracking ability (again I don't know
> if these exist). Which, on hwpt without dirty tracking, on which the devices
> attached to it that lack VF dirty tracking should be added an LM blocker.
>
> These checks were added from the previous cycles, but I was actually thinking in
> making these g_assert() to make sure this is not exercised like that. But
> considering I plan on improve a mixed usage of VF dirty tracking with IOMMU I
> left them as simply checks.
OK fair enough. Keeping that in mind for future consolidations
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
>
>>> +
>>> + if (!iommufd_backend_set_dirty_tracking(container->be,
>>> + hwpt->hwpt_id, start, errp)) {
>>> + goto err;
>>> + }
>>> + }
>>> +
>>> + return 0;
>>> +
>>> +err:
>>> + QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>>> + if (!iommufd_hwpt_dirty_tracking(hwpt)) {
>>> + continue;
>>> + }
>>> + iommufd_backend_set_dirty_tracking(container->be,
>>> + hwpt->hwpt_id, !start, NULL);
>>> + }
>>> + return -EINVAL;
>>> +}
>>> +
>>> static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>>> {
>>> ERRP_GUARD();
>>> @@ -725,6 +756,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
>>> vioc->attach_device = iommufd_cdev_attach;
>>> vioc->detach_device = iommufd_cdev_detach;
>>> vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
>>> + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
>>> };
>>>
>>> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>>> diff --git a/backends/trace-events b/backends/trace-events
>>> index 4d8ac02fe7d6..28aca3b859d4 100644
>>> --- a/backends/trace-events
>>> +++ b/backends/trace-events
>>> @@ -16,3 +16,4 @@ iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t si
>>> iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
>>> iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
>>> iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
>>> +iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 7/9] vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (5 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 6/9] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-22 21:13 ` [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported Joao Martins
` (3 subsequent siblings)
10 siblings, 0 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI
that fetches the bitmap that tells what was dirty in an IOVA
range.
A single bitmap is allocated and used across all the hwpts
sharing an IOAS which is then used in log_sync() to set Qemu
global bitmaps.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.co>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/sysemu/iommufd.h | 4 ++++
backends/iommufd.c | 29 +++++++++++++++++++++++++++++
hw/vfio/iommufd.c | 28 ++++++++++++++++++++++++++++
backends/trace-events | 1 +
4 files changed, 62 insertions(+)
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 6fb412f61144..4c4886c7787b 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -57,6 +57,10 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
Error **errp);
bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
bool start, Error **errp);
+bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
+ uint64_t iova, ram_addr_t size,
+ uint64_t page_size, uint64_t *data,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index b97883503884..9bc466a89c47 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -261,6 +261,35 @@ bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
return true;
}
+bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
+ uint32_t hwpt_id,
+ uint64_t iova, ram_addr_t size,
+ uint64_t page_size, uint64_t *data,
+ Error **errp)
+{
+ int ret;
+ struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
+ .size = sizeof(get_dirty_bitmap),
+ .hwpt_id = hwpt_id,
+ .iova = iova,
+ .length = size,
+ .page_size = page_size,
+ .data = (uintptr_t)data,
+ };
+
+ ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
+ trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
+ page_size, ret ? errno : 0);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
+ " size: 0x"RAM_ADDR_FMT") failed", iova, size);
+ return false;
+ }
+
+ return true;
+}
+
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
uint64_t *caps, Error **errp)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index b882a3f59a6e..92b976464283 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
+#include "exec/ram_addr.h"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
@@ -146,6 +147,32 @@ err:
return -EINVAL;
}
+static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ VFIOIOMMUFDContainer *container = container_of(bcontainer,
+ VFIOIOMMUFDContainer,
+ bcontainer);
+ unsigned long page_size = qemu_real_host_page_size();
+ VFIOIOASHwpt *hwpt;
+
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+
+ if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
+ iova, size, page_size,
+ (uint64_t *)vbmap->bitmap,
+ errp)) {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
{
ERRP_GUARD();
@@ -757,6 +784,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
vioc->detach_device = iommufd_cdev_detach;
vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
diff --git a/backends/trace-events b/backends/trace-events
index 28aca3b859d4..40811a316215 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -17,3 +17,4 @@ iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
+iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (6 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 7/9] vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 4:45 ` Duan, Zhenzhong
2024-07-23 8:22 ` Eric Auger
2024-07-22 21:13 ` [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking Joao Martins
` (2 subsequent siblings)
10 siblings, 2 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
By default VFIO migration is set to auto, which will support live
migration if the migration capability is set *and* also dirty page
tracking is supported.
For testing purposes one can force enable without dirty page tracking
via enable-migration=on, but that option is generally left for testing
purposes.
So starting with IOMMU dirty tracking it can use to accomodate the lack of
VF dirty page tracking allowing us to minimize the VF requirements for
migration and thus enabling migration by default for those too.
While at it change the error messages to mention IOMMU dirty tracking as
well.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
hw/vfio/migration.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 34d4be2ce1b1..cbfaef7afffe 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -1036,16 +1036,16 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
return !vfio_block_migration(vbasedev, err, errp);
}
- if (!vbasedev->dirty_pages_supported) {
+ if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
error_setg(&err,
- "%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ "%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
goto add_blocker;
}
- warn_report("%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ warn_report("%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
}
ret = vfio_block_multiple_devices_migration(vbasedev, errp);
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* RE: [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported
2024-07-22 21:13 ` [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported Joao Martins
@ 2024-07-23 4:45 ` Duan, Zhenzhong
2024-07-23 8:22 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 4:45 UTC (permalink / raw)
To: Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: [PATCH v6 8/9] vfio/migration: Don't block migration device dirty
>tracking is unsupported
>
>By default VFIO migration is set to auto, which will support live
>migration if the migration capability is set *and* also dirty page
>tracking is supported.
>
>For testing purposes one can force enable without dirty page tracking
>via enable-migration=on, but that option is generally left for testing
>purposes.
>
>So starting with IOMMU dirty tracking it can use to accomodate the lack of
>VF dirty page tracking allowing us to minimize the VF requirements for
>migration and thus enabling migration by default for those too.
>
>While at it change the error messages to mention IOMMU dirty tracking as
>well.
>
>Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Thanks
Zhenzhong
>---
> hw/vfio/migration.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
>diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>index 34d4be2ce1b1..cbfaef7afffe 100644
>--- a/hw/vfio/migration.c
>+++ b/hw/vfio/migration.c
>@@ -1036,16 +1036,16 @@ bool vfio_migration_realize(VFIODevice
>*vbasedev, Error **errp)
> return !vfio_block_migration(vbasedev, err, errp);
> }
>
>- if (!vbasedev->dirty_pages_supported) {
>+ if (!vbasedev->dirty_pages_supported && !vbasedev-
>>iommu_dirty_tracking) {
> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
> error_setg(&err,
>- "%s: VFIO device doesn't support device dirty tracking",
>- vbasedev->name);
>+ "%s: VFIO device doesn't support device and "
>+ "IOMMU dirty tracking", vbasedev->name);
> goto add_blocker;
> }
>
>- warn_report("%s: VFIO device doesn't support device dirty tracking",
>- vbasedev->name);
>+ warn_report("%s: VFIO device doesn't support device and "
>+ "IOMMU dirty tracking", vbasedev->name);
> }
>
> ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>--
>2.17.2
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported
2024-07-22 21:13 ` [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported Joao Martins
2024-07-23 4:45 ` Duan, Zhenzhong
@ 2024-07-23 8:22 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:22 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> By default VFIO migration is set to auto, which will support live
> migration if the migration capability is set *and* also dirty page
> tracking is supported.
>
> For testing purposes one can force enable without dirty page tracking
> via enable-migration=on, but that option is generally left for testing
> purposes.
>
> So starting with IOMMU dirty tracking it can use to accomodate the lack of
> VF dirty page tracking allowing us to minimize the VF requirements for
> migration and thus enabling migration by default for those too.
>
> While at it change the error messages to mention IOMMU dirty tracking as
> well.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> hw/vfio/migration.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 34d4be2ce1b1..cbfaef7afffe 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -1036,16 +1036,16 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> return !vfio_block_migration(vbasedev, err, errp);
> }
>
> - if (!vbasedev->dirty_pages_supported) {
> + if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
> error_setg(&err,
> - "%s: VFIO device doesn't support device dirty tracking",
> - vbasedev->name);
nit: does support neither/nor writing may be better here and below but I
am not a native english speaker
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
> + "%s: VFIO device doesn't support device and "
> + "IOMMU dirty tracking", vbasedev->name);
> goto add_blocker;
> }
>
> - warn_report("%s: VFIO device doesn't support device dirty tracking",
> - vbasedev->name);
> + warn_report("%s: VFIO device doesn't support device and "
> + "IOMMU dirty tracking", vbasedev->name);
> }
>
> ret = vfio_block_multiple_devices_migration(vbasedev, errp);
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (7 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 8/9] vfio/migration: Don't block migration device dirty tracking is unsupported Joao Martins
@ 2024-07-22 21:13 ` Joao Martins
2024-07-23 5:05 ` Duan, Zhenzhong
2024-07-23 8:31 ` Eric Auger
2024-07-23 8:35 ` [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Cédric Le Goater
2024-07-23 14:23 ` Yi Liu
10 siblings, 2 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-22 21:13 UTC (permalink / raw)
To: qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Cedric Le Goater, Jason Gunthorpe, Avihai Horon, Joao Martins
The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole
tracking of VF pre-copy phase of dirty page tracking, though it means
that it will only be used at the start of the switchover phase.
Add an option that disables the VF dirty page tracking, and fall
back into container-based dirty page tracking. This also allows to
use IOMMU dirty tracking even on VFs with their own dirty
tracker scheme.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
include/hw/vfio/vfio-common.h | 1 +
hw/vfio/common.c | 3 +++
hw/vfio/migration.c | 4 +++-
hw/vfio/pci.c | 3 +++
4 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 1e02c98b09ba..fed499b199f0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -138,6 +138,7 @@ typedef struct VFIODevice {
VFIOMigration *migration;
Error *migration_blocker;
OnOffAuto pre_copy_dirty_page_tracking;
+ OnOffAuto device_dirty_page_tracking;
bool dirty_pages_supported;
bool dirty_tracking;
bool iommu_dirty_tracking;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index da12cbd56408..36d0cf6585b2 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
VFIODevice *vbasedev;
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
+ return false;
+ }
if (!vbasedev->dirty_pages_supported) {
return false;
}
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index cbfaef7afffe..262d42a46e58 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -1036,7 +1036,9 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
return !vfio_block_migration(vbasedev, err, errp);
}
- if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
+ if ((!vbasedev->dirty_pages_supported ||
+ vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+ !vbasedev->iommu_dirty_tracking) {
if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
error_setg(&err,
"%s: VFIO device doesn't support device and "
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8c0f212a163e..a0767de54b8d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3364,6 +3364,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
vbasedev.pre_copy_dirty_page_tracking,
ON_OFF_AUTO_ON),
+ DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
+ vbasedev.device_dirty_page_tracking,
+ ON_OFF_AUTO_ON),
DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
display, ON_OFF_AUTO_OFF),
DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
--
2.17.2
^ permalink raw reply related [flat|nested] 51+ messages in thread
* RE: [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking
2024-07-22 21:13 ` [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking Joao Martins
@ 2024-07-23 5:05 ` Duan, Zhenzhong
2024-07-23 8:31 ` Eric Auger
1 sibling, 0 replies; 51+ messages in thread
From: Duan, Zhenzhong @ 2024-07-23 5:05 UTC (permalink / raw)
To: Joao Martins, qemu-devel@nongnu.org
Cc: Liu, Yi L, Eric Auger, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: [PATCH v6 9/9] vfio/common: Allow disabling device dirty page
>tracking
>
>The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole
>tracking of VF pre-copy phase of dirty page tracking, though it means
>that it will only be used at the start of the switchover phase.
>
>Add an option that disables the VF dirty page tracking, and fall
>back into container-based dirty page tracking. This also allows to
>use IOMMU dirty tracking even on VFs with their own dirty
>tracker scheme.
>
>Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Thanks
Zhenzhong
>---
> include/hw/vfio/vfio-common.h | 1 +
> hw/vfio/common.c | 3 +++
> hw/vfio/migration.c | 4 +++-
> hw/vfio/pci.c | 3 +++
> 4 files changed, 10 insertions(+), 1 deletion(-)
>
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>index 1e02c98b09ba..fed499b199f0 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -138,6 +138,7 @@ typedef struct VFIODevice {
> VFIOMigration *migration;
> Error *migration_blocker;
> OnOffAuto pre_copy_dirty_page_tracking;
>+ OnOffAuto device_dirty_page_tracking;
> bool dirty_pages_supported;
> bool dirty_tracking;
> bool iommu_dirty_tracking;
>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>index da12cbd56408..36d0cf6585b2 100644
>--- a/hw/vfio/common.c
>+++ b/hw/vfio/common.c
>@@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const
>VFIOContainerBase *bcontainer)
> VFIODevice *vbasedev;
>
> QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
>+ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
>+ return false;
>+ }
> if (!vbasedev->dirty_pages_supported) {
> return false;
> }
>diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>index cbfaef7afffe..262d42a46e58 100644
>--- a/hw/vfio/migration.c
>+++ b/hw/vfio/migration.c
>@@ -1036,7 +1036,9 @@ bool vfio_migration_realize(VFIODevice
>*vbasedev, Error **errp)
> return !vfio_block_migration(vbasedev, err, errp);
> }
>
>- if (!vbasedev->dirty_pages_supported && !vbasedev-
>>iommu_dirty_tracking) {
>+ if ((!vbasedev->dirty_pages_supported ||
>+ vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
>+ !vbasedev->iommu_dirty_tracking) {
> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
> error_setg(&err,
> "%s: VFIO device doesn't support device and "
>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>index 8c0f212a163e..a0767de54b8d 100644
>--- a/hw/vfio/pci.c
>+++ b/hw/vfio/pci.c
>@@ -3364,6 +3364,9 @@ static Property vfio_pci_dev_properties[] = {
> DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking",
>VFIOPCIDevice,
> vbasedev.pre_copy_dirty_page_tracking,
> ON_OFF_AUTO_ON),
>+ DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking",
>VFIOPCIDevice,
>+ vbasedev.device_dirty_page_tracking,
>+ ON_OFF_AUTO_ON),
> DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
> display, ON_OFF_AUTO_OFF),
> DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
>--
>2.17.2
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking
2024-07-22 21:13 ` [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking Joao Martins
2024-07-23 5:05 ` Duan, Zhenzhong
@ 2024-07-23 8:31 ` Eric Auger
2024-07-23 8:42 ` Joao Martins
1 sibling, 1 reply; 51+ messages in thread
From: Eric Auger @ 2024-07-23 8:31 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole
> tracking of VF pre-copy phase of dirty page tracking, though it means
> that it will only be used at the start of the switchover phase.
>
> Add an option that disables the VF dirty page tracking, and fall
> back into container-based dirty page tracking. This also allows to
> use IOMMU dirty tracking even on VFs with their own dirty
> tracker scheme.
Forced ON value isn't really meaningful though. I mean only valid values
are AUTO and OFF, correct?
Eric
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> include/hw/vfio/vfio-common.h | 1 +
> hw/vfio/common.c | 3 +++
> hw/vfio/migration.c | 4 +++-
> hw/vfio/pci.c | 3 +++
> 4 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1e02c98b09ba..fed499b199f0 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -138,6 +138,7 @@ typedef struct VFIODevice {
> VFIOMigration *migration;
> Error *migration_blocker;
> OnOffAuto pre_copy_dirty_page_tracking;
> + OnOffAuto device_dirty_page_tracking;
> bool dirty_pages_supported;
> bool dirty_tracking;
> bool iommu_dirty_tracking;
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index da12cbd56408..36d0cf6585b2 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
> VFIODevice *vbasedev;
>
> QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
> + if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
> + return false;
> + }
> if (!vbasedev->dirty_pages_supported) {
> return false;
> }
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index cbfaef7afffe..262d42a46e58 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -1036,7 +1036,9 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> return !vfio_block_migration(vbasedev, err, errp);
> }
>
> - if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
> + if ((!vbasedev->dirty_pages_supported ||
> + vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
> + !vbasedev->iommu_dirty_tracking) {
> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
> error_setg(&err,
> "%s: VFIO device doesn't support device and "
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 8c0f212a163e..a0767de54b8d 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3364,6 +3364,9 @@ static Property vfio_pci_dev_properties[] = {
> DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
> vbasedev.pre_copy_dirty_page_tracking,
> ON_OFF_AUTO_ON),
> + DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
> + vbasedev.device_dirty_page_tracking,
> + ON_OFF_AUTO_ON),
> DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
> display, ON_OFF_AUTO_OFF),
> DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking
2024-07-23 8:31 ` Eric Auger
@ 2024-07-23 8:42 ` Joao Martins
2024-07-23 10:11 ` Eric Auger
0 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:42 UTC (permalink / raw)
To: eric.auger, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 09:31, Eric Auger wrote:
>
>
> On 7/22/24 23:13, Joao Martins wrote:
>> The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole
>> tracking of VF pre-copy phase of dirty page tracking, though it means
>> that it will only be used at the start of the switchover phase.
>>
>> Add an option that disables the VF dirty page tracking, and fall
>> back into container-based dirty page tracking. This also allows to
>> use IOMMU dirty tracking even on VFs with their own dirty
>> tracker scheme.
>
> Forced ON value isn't really meaningful though. I mean only valid values
> are AUTO and OFF, correct?
>
It's more like 'on' if supported.
You could argue that AUTO_AUTO is the right value considering that device may or
may not support. But really both AUTO/ON are the same as the property that
outlines device dirty tracking is ::dirty_tracking for the VF which is
eventually seeded from VFIO pci feature. So this is mostly to override the
default and hence AUTO_OFF is the only thing that it's tested against in the
various places.
Me picking AUTO_ON just mirrored how it's already done for
x-pre-copy-dirty-page-tracking (and I see used e.g. kernel_irqchip and others)
> Eric
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> include/hw/vfio/vfio-common.h | 1 +
>> hw/vfio/common.c | 3 +++
>> hw/vfio/migration.c | 4 +++-
>> hw/vfio/pci.c | 3 +++
>> 4 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 1e02c98b09ba..fed499b199f0 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -138,6 +138,7 @@ typedef struct VFIODevice {
>> VFIOMigration *migration;
>> Error *migration_blocker;
>> OnOffAuto pre_copy_dirty_page_tracking;
>> + OnOffAuto device_dirty_page_tracking;
>> bool dirty_pages_supported;
>> bool dirty_tracking;
>> bool iommu_dirty_tracking;
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index da12cbd56408..36d0cf6585b2 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
>> VFIODevice *vbasedev;
>>
>> QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
>> + if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
>> + return false;
>> + }
>> if (!vbasedev->dirty_pages_supported) {
>> return false;
>> }
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index cbfaef7afffe..262d42a46e58 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -1036,7 +1036,9 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>> return !vfio_block_migration(vbasedev, err, errp);
>> }
>>
>> - if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
>> + if ((!vbasedev->dirty_pages_supported ||
>> + vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
>> + !vbasedev->iommu_dirty_tracking) {
>> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>> error_setg(&err,
>> "%s: VFIO device doesn't support device and "
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 8c0f212a163e..a0767de54b8d 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3364,6 +3364,9 @@ static Property vfio_pci_dev_properties[] = {
>> DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
>> vbasedev.pre_copy_dirty_page_tracking,
>> ON_OFF_AUTO_ON),
>> + DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
>> + vbasedev.device_dirty_page_tracking,
>> + ON_OFF_AUTO_ON),
>> DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
>> display, ON_OFF_AUTO_OFF),
>> DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking
2024-07-23 8:42 ` Joao Martins
@ 2024-07-23 10:11 ` Eric Auger
0 siblings, 0 replies; 51+ messages in thread
From: Eric Auger @ 2024-07-23 10:11 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 7/23/24 10:42, Joao Martins wrote:
> On 23/07/2024 09:31, Eric Auger wrote:
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole
>>> tracking of VF pre-copy phase of dirty page tracking, though it means
>>> that it will only be used at the start of the switchover phase.
>>>
>>> Add an option that disables the VF dirty page tracking, and fall
>>> back into container-based dirty page tracking. This also allows to
>>> use IOMMU dirty tracking even on VFs with their own dirty
>>> tracker scheme.
>> Forced ON value isn't really meaningful though. I mean only valid values
>> are AUTO and OFF, correct?
>>
> It's more like 'on' if supported.
>
> You could argue that AUTO_AUTO is the right value considering that device may or
> may not support. But really both AUTO/ON are the same as the property that
> outlines device dirty tracking is ::dirty_tracking for the VF which is
> eventually seeded from VFIO pci feature. So this is mostly to override the
> default and hence AUTO_OFF is the only thing that it's tested against in the
> various places.
>
> Me picking AUTO_ON just mirrored how it's already done for
> x-pre-copy-dirty-page-tracking (and I see used e.g. kernel_irqchip and others)
OK
Thanks
Eric
>
>> Eric
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 1 +
>>> hw/vfio/common.c | 3 +++
>>> hw/vfio/migration.c | 4 +++-
>>> hw/vfio/pci.c | 3 +++
>>> 4 files changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 1e02c98b09ba..fed499b199f0 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -138,6 +138,7 @@ typedef struct VFIODevice {
>>> VFIOMigration *migration;
>>> Error *migration_blocker;
>>> OnOffAuto pre_copy_dirty_page_tracking;
>>> + OnOffAuto device_dirty_page_tracking;
>>> bool dirty_pages_supported;
>>> bool dirty_tracking;
>>> bool iommu_dirty_tracking;
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index da12cbd56408..36d0cf6585b2 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
>>> VFIODevice *vbasedev;
>>>
>>> QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
>>> + if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
>>> + return false;
>>> + }
>>> if (!vbasedev->dirty_pages_supported) {
>>> return false;
>>> }
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index cbfaef7afffe..262d42a46e58 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -1036,7 +1036,9 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>> return !vfio_block_migration(vbasedev, err, errp);
>>> }
>>>
>>> - if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) {
>>> + if ((!vbasedev->dirty_pages_supported ||
>>> + vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
>>> + !vbasedev->iommu_dirty_tracking) {
>>> if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>>> error_setg(&err,
>>> "%s: VFIO device doesn't support device and "
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index 8c0f212a163e..a0767de54b8d 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -3364,6 +3364,9 @@ static Property vfio_pci_dev_properties[] = {
>>> DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
>>> vbasedev.pre_copy_dirty_page_tracking,
>>> ON_OFF_AUTO_ON),
>>> + DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
>>> + vbasedev.device_dirty_page_tracking,
>>> + ON_OFF_AUTO_ON),
>>> DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
>>> display, ON_OFF_AUTO_OFF),
>>> DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (8 preceding siblings ...)
2024-07-22 21:13 ` [PATCH v6 9/9] vfio/common: Allow disabling device dirty page tracking Joao Martins
@ 2024-07-23 8:35 ` Cédric Le Goater
2024-07-23 8:56 ` Joao Martins
2024-07-23 14:23 ` Yi Liu
10 siblings, 1 reply; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 8:35 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Jason Gunthorpe, Avihai Horon
On 7/22/24 23:13, Joao Martins wrote:
> This small series adds support for IOMMU dirty tracking support via the
> IOMMUFD backend. The hardware capability is available on most recent x86
> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
> organized as follows:
>
> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>
> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
> we will find and attach a device to a compatible IOMMU domain, or allocate a new
> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
> workflow is relatively simple:
>
> 1) Probe device and allow dirty tracking in the HWPT
> 2) Toggling dirty tracking on/off
> 3) Read-and-clear of Dirty IOVAs
>
> The heuristics selected for (1) were to always request the HWPT for
> dirty tracking if supported, or rely on device dirty page tracking. This
> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
> tracking even if we ask during hwpt allocation.
>
> The unmap case is deferred until further vIOMMU support with migration
> is added[3] which will then introduce the usage of
> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
> dma unmap bitmap flow.
>
> * Patches 8 - 9: Don't block live migration where there's no VF dirty
> tracker, considering that we have IOMMU dirty tracking.
>
> Comments and feedback appreciated (on patches 1, 5, 8, 9)
>
> Cheers,
> Joao
>
> P.S. Suggest v6.11-rc as hypervisor kernel as there's
> some bugs fixed there with regards to IOMMU hugepage dirty tracking.
>
> Changes since v5[6]:
> * Remove patches 1-4 as these were commited to vfio-next
> * Add the Rb by Cedric and Zhenzhong (previously patches 7, 8, 10, 11)
> * Introduce VFIODevice::iommu_dirty_tracking and use it on patch 5, 8
> to store whether we can use IOMMU dirty tracking.
>
> Changes since v4[5]:
> * Add various Reviewed-by in patches 2,3,4,6,8,11
> * Change error messages to mention IOMMU (Zhenzhong)
> * Better improve the checking of dirty page tracking in
> vfio_migration_realize() to detect per-device IOMMU instead of using
> container dirty_page_supported().
> * Improve various commit messages (Eric)
> * Extract the caps::hw_caps into its own patch as it was miosleading to
> be hidden in another patch (new patch 7)
> * Restructure patch 1 helper to be vfio_device_is_mdev() and use
> vfio::mdev directly in rest of patches (Cedric)
> * Improve error messages of set,query dirty tracking (Cedric)
> * Add missing casts to uintptr and uint64_t* (Cedric)
> * Add missing commens to struct doc from aw_bits removal (and hw_caps
> addition) (Eric)
> * Fix the detach flow in auto domains (Eric)
> * Set hwpt to NULL on detach (Eric)
> * Spurious line (Eric)
>
> Changes since v3[5]:
> * Skip HostIOMMUDevice::realize for mdev, and introduce a helper to check if the VFIO
> device is mdev. (Zhenzhong)
> * Skip setting IOMMU device for mdev (Zhenzhong)
> * Add Zhenzhong review tag in patch 3
> * Utilize vbasedev::bcontainer::dirty_pages_supported instead of introducing
> a new HostIOMMUDevice capability and thus remove the cap patch from the series (Zhenzhong)
> * Move the HostIOMMUDevice::realize() to be part of VFIODevice initialization in attach_device()
> while skipping it all together for mdev. (Cedric)
> * Due to the previous item, had to remove aw_bits because it depends on device attach being
> finished, instead defer it to when get_cap() gets called.
> * Skip auto domains for mdev instead of purposedly erroring out (Zhenzhong)
> * Pass errp in all cases, and instead just free the error in case of -EINVAL
> in most of all patches, and also pass Error* in iommufd_backend_alloc_hwpt() amd
> set/query dirty. This is made better thanks in part to skipping auto domains for mdev (Cedric)
>
> Changes since RFCv2[4]:
> * Always allocate hwpt with IOMMU_HWPT_ALLOC_DIRTY_TRACKING even if
> we end up not actually toggling dirty tracking. (Avihai)
> * Fix error handling widely in auto domains logic and all patches (Avihai)
> * Reuse iommufd_backend_get_device_info() for capabilities (Zhenzhong)
> * New patches 1 and 2 taking into consideration previous comments.
> * Store hwpt::flags to know if we have dirty tracking (Avihai)
> * New patch 8, that allows to query dirty tracking support after
> provisioning. This is a cleaner way to check IOMMU dirty tracking support
> when vfio::migration is iniitalized, as opposed to RFCv2 via device caps.
> device caps way is still used because at vfio attach we aren't yet with
> a fully initialized migration state.
> * Adopt error propagation in query,set dirty tracking
> * Misc improvements overall broadly and Avihai
> * Drop hugepages as it's a bit unrelated; I can pursue that patch
> * separately. The main motivation is to provide a way to test
> without hugepages similar to what vfio_type1_iommu.disable_hugepages=1
> does.
>
> Changes since RFCv1[2]:
> * Remove intel/amd dirty tracking emulation enabling
> * Remove the dirtyrate improvement for VF/IOMMU dirty tracking
> [Will pursue these two in separate series]
> * Introduce auto domains support
> * Enforce dirty tracking following the IOMMUFD UAPI for this
> * Add support for toggling hugepages in IOMMUFD
> * Auto enable support when VF supports migration to use IOMMU
> when it doesn't have VF dirty tracking
> * Add a parameter to toggle VF dirty tracking
>
> [0] https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.duan@intel.com/
> [1] https://lore.kernel.org/qemu-devel/20240201072818.327930-10-zhenzhong.duan@intel.com/
> [2] https://lore.kernel.org/qemu-devel/20220428211351.3897-1-joao.m.martins@oracle.com/
> [3] https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.martins@oracle.com/
> [4] https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.martins@oracle.com/
> [5] https://lore.kernel.org/qemu-devel/20240708143420.16953-1-joao.m.martins@oracle.com/
> [6] https://lore.kernel.org/qemu-devel/20240719120501.81279-1-joao.m.martins@oracle.com/
>
> Joao Martins (9):
> vfio/iommufd: Introduce auto domain creation
> vfio/{iommufd,container}: Remove caps::aw_bits
> vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
> vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during
> attach_device()
> vfio/iommufd: Probe and request hwpt dirty tracking capability
> vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
> vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
> vfio/migration: Don't block migration device dirty tracking is
> unsupported
> vfio/common: Allow disabling device dirty page tracking
>
> include/hw/vfio/vfio-common.h | 13 +++
> include/sysemu/host_iommu_device.h | 5 +-
> include/sysemu/iommufd.h | 11 ++
> backends/iommufd.c | 85 ++++++++++++++-
> hw/vfio/common.c | 19 ++--
> hw/vfio/container.c | 9 +-
> hw/vfio/helpers.c | 11 ++
> hw/vfio/iommufd.c | 170 ++++++++++++++++++++++++++++-
> hw/vfio/migration.c | 12 +-
> hw/vfio/pci.c | 3 +
> backends/trace-events | 3 +
> 11 files changed, 318 insertions(+), 23 deletions(-)
>
Applied to vfio-next with the changes that were discussed this morning.
Please check.
Thanks,
C.
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-23 8:35 ` [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Cédric Le Goater
@ 2024-07-23 8:56 ` Joao Martins
2024-07-23 9:08 ` Cédric Le Goater
0 siblings, 1 reply; 51+ messages in thread
From: Joao Martins @ 2024-07-23 8:56 UTC (permalink / raw)
To: Cédric Le Goater, qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 09:35, Cédric Le Goater wrote:
> On 7/22/24 23:13, Joao Martins wrote:
>> This small series adds support for IOMMU dirty tracking support via the
>> IOMMUFD backend. The hardware capability is available on most recent x86
>> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
>> organized as follows:
>>
>> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>>
>> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
>> we will find and attach a device to a compatible IOMMU domain, or allocate a new
>> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
>> workflow is relatively simple:
>>
>> 1) Probe device and allow dirty tracking in the HWPT
>> 2) Toggling dirty tracking on/off
>> 3) Read-and-clear of Dirty IOVAs
>>
>> The heuristics selected for (1) were to always request the HWPT for
>> dirty tracking if supported, or rely on device dirty page tracking. This
>> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
>> tracking even if we ask during hwpt allocation.
>>
>> The unmap case is deferred until further vIOMMU support with migration
>> is added[3] which will then introduce the usage of
>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
>> dma unmap bitmap flow.
>>
>> * Patches 8 - 9: Don't block live migration where there's no VF dirty
>> tracker, considering that we have IOMMU dirty tracking.
>>
>> Comments and feedback appreciated (on patches 1, 5, 8, 9)
>>
>> Cheers,
>> Joao
>>
>> P.S. Suggest v6.11-rc as hypervisor kernel as there's
>> some bugs fixed there with regards to IOMMU hugepage dirty tracking.
>>
>> Changes since v5[6]:
>> * Remove patches 1-4 as these were commited to vfio-next
>> * Add the Rb by Cedric and Zhenzhong (previously patches 7, 8, 10, 11)
>> * Introduce VFIODevice::iommu_dirty_tracking and use it on patch 5, 8
>> to store whether we can use IOMMU dirty tracking.
>>
>> Changes since v4[5]:
>> * Add various Reviewed-by in patches 2,3,4,6,8,11
>> * Change error messages to mention IOMMU (Zhenzhong)
>> * Better improve the checking of dirty page tracking in
>> vfio_migration_realize() to detect per-device IOMMU instead of using
>> container dirty_page_supported().
>> * Improve various commit messages (Eric)
>> * Extract the caps::hw_caps into its own patch as it was miosleading to
>> be hidden in another patch (new patch 7)
>> * Restructure patch 1 helper to be vfio_device_is_mdev() and use
>> vfio::mdev directly in rest of patches (Cedric)
>> * Improve error messages of set,query dirty tracking (Cedric)
>> * Add missing casts to uintptr and uint64_t* (Cedric)
>> * Add missing commens to struct doc from aw_bits removal (and hw_caps
>> addition) (Eric)
>> * Fix the detach flow in auto domains (Eric)
>> * Set hwpt to NULL on detach (Eric)
>> * Spurious line (Eric)
>>
>> Changes since v3[5]:
>> * Skip HostIOMMUDevice::realize for mdev, and introduce a helper to check if
>> the VFIO
>> device is mdev. (Zhenzhong)
>> * Skip setting IOMMU device for mdev (Zhenzhong)
>> * Add Zhenzhong review tag in patch 3
>> * Utilize vbasedev::bcontainer::dirty_pages_supported instead of introducing
>> a new HostIOMMUDevice capability and thus remove the cap patch from the
>> series (Zhenzhong)
>> * Move the HostIOMMUDevice::realize() to be part of VFIODevice initialization
>> in attach_device()
>> while skipping it all together for mdev. (Cedric)
>> * Due to the previous item, had to remove aw_bits because it depends on device
>> attach being
>> finished, instead defer it to when get_cap() gets called.
>> * Skip auto domains for mdev instead of purposedly erroring out (Zhenzhong)
>> * Pass errp in all cases, and instead just free the error in case of -EINVAL
>> in most of all patches, and also pass Error* in
>> iommufd_backend_alloc_hwpt() amd
>> set/query dirty. This is made better thanks in part to skipping auto
>> domains for mdev (Cedric)
>>
>> Changes since RFCv2[4]:
>> * Always allocate hwpt with IOMMU_HWPT_ALLOC_DIRTY_TRACKING even if
>> we end up not actually toggling dirty tracking. (Avihai)
>> * Fix error handling widely in auto domains logic and all patches (Avihai)
>> * Reuse iommufd_backend_get_device_info() for capabilities (Zhenzhong)
>> * New patches 1 and 2 taking into consideration previous comments.
>> * Store hwpt::flags to know if we have dirty tracking (Avihai)
>> * New patch 8, that allows to query dirty tracking support after
>> provisioning. This is a cleaner way to check IOMMU dirty tracking support
>> when vfio::migration is iniitalized, as opposed to RFCv2 via device caps.
>> device caps way is still used because at vfio attach we aren't yet with
>> a fully initialized migration state.
>> * Adopt error propagation in query,set dirty tracking
>> * Misc improvements overall broadly and Avihai
>> * Drop hugepages as it's a bit unrelated; I can pursue that patch
>> * separately. The main motivation is to provide a way to test
>> without hugepages similar to what vfio_type1_iommu.disable_hugepages=1
>> does.
>>
>> Changes since RFCv1[2]:
>> * Remove intel/amd dirty tracking emulation enabling
>> * Remove the dirtyrate improvement for VF/IOMMU dirty tracking
>> [Will pursue these two in separate series]
>> * Introduce auto domains support
>> * Enforce dirty tracking following the IOMMUFD UAPI for this
>> * Add support for toggling hugepages in IOMMUFD
>> * Auto enable support when VF supports migration to use IOMMU
>> when it doesn't have VF dirty tracking
>> * Add a parameter to toggle VF dirty tracking
>>
>> [0]
>> https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.duan@intel.com/
>> [1]
>> https://lore.kernel.org/qemu-devel/20240201072818.327930-10-zhenzhong.duan@intel.com/
>> [2]
>> https://lore.kernel.org/qemu-devel/20220428211351.3897-1-joao.m.martins@oracle.com/
>> [3]
>> https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.martins@oracle.com/
>> [4]
>> https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.martins@oracle.com/
>> [5]
>> https://lore.kernel.org/qemu-devel/20240708143420.16953-1-joao.m.martins@oracle.com/
>> [6]
>> https://lore.kernel.org/qemu-devel/20240719120501.81279-1-joao.m.martins@oracle.com/
>>
>> Joao Martins (9):
>> vfio/iommufd: Introduce auto domain creation
>> vfio/{iommufd,container}: Remove caps::aw_bits
>> vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
>> vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during
>> attach_device()
>> vfio/iommufd: Probe and request hwpt dirty tracking capability
>> vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
>> vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
>> vfio/migration: Don't block migration device dirty tracking is
>> unsupported
>> vfio/common: Allow disabling device dirty page tracking
>>
>> include/hw/vfio/vfio-common.h | 13 +++
>> include/sysemu/host_iommu_device.h | 5 +-
>> include/sysemu/iommufd.h | 11 ++
>> backends/iommufd.c | 85 ++++++++++++++-
>> hw/vfio/common.c | 19 ++--
>> hw/vfio/container.c | 9 +-
>> hw/vfio/helpers.c | 11 ++
>> hw/vfio/iommufd.c | 170 ++++++++++++++++++++++++++++-
>> hw/vfio/migration.c | 12 +-
>> hw/vfio/pci.c | 3 +
>> backends/trace-events | 3 +
>> 11 files changed, 318 insertions(+), 23 deletions(-)
>
> Applied to vfio-next with the changes that were discussed this morning.
> Please check.
>
I think the only thing missing is in the fourth patch to add the comment Eric
suggested (see below). Other than that, looks good to me.
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 50ffa4b77090..abb6f1a4b4a8 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -488,6 +488,13 @@ static bool iommufd_cdev_attach(const char *name,
VFIODevice *vbasedev,
space = vfio_get_address_space(as);
+ /*
+ * The HostIOMMUDevice data from legacy backend is static and doesn't need
+ * any information from the (type1-iommu) backend to be initialized. In
+ * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd
+ * FD to be connected and having a devid to be able to successfully call
+ * iommufd_backend_get_device_info().
+ */
if (!vfio_device_hiod_realize(vbasedev, errp)) {
goto err_alloc_ioas;
}
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-23 8:56 ` Joao Martins
@ 2024-07-23 9:08 ` Cédric Le Goater
0 siblings, 0 replies; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 9:08 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Yi Liu, Eric Auger, Zhenzhong Duan, Alex Williamson,
Jason Gunthorpe, Avihai Horon
On 7/23/24 10:56, Joao Martins wrote:
> On 23/07/2024 09:35, Cédric Le Goater wrote:
>> On 7/22/24 23:13, Joao Martins wrote:
>>> This small series adds support for IOMMU dirty tracking support via the
>>> IOMMUFD backend. The hardware capability is available on most recent x86
>>> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
>>> organized as follows:
>>>
>>> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>>>
>>> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
>>> we will find and attach a device to a compatible IOMMU domain, or allocate a new
>>> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
>>> workflow is relatively simple:
>>>
>>> 1) Probe device and allow dirty tracking in the HWPT
>>> 2) Toggling dirty tracking on/off
>>> 3) Read-and-clear of Dirty IOVAs
>>>
>>> The heuristics selected for (1) were to always request the HWPT for
>>> dirty tracking if supported, or rely on device dirty page tracking. This
>>> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
>>> tracking even if we ask during hwpt allocation.
>>>
>>> The unmap case is deferred until further vIOMMU support with migration
>>> is added[3] which will then introduce the usage of
>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
>>> dma unmap bitmap flow.
>>>
>>> * Patches 8 - 9: Don't block live migration where there's no VF dirty
>>> tracker, considering that we have IOMMU dirty tracking.
>>>
>>> Comments and feedback appreciated (on patches 1, 5, 8, 9)
>>>
>>> Cheers,
>>> Joao
>>>
>>> P.S. Suggest v6.11-rc as hypervisor kernel as there's
>>> some bugs fixed there with regards to IOMMU hugepage dirty tracking.
>>>
>>> Changes since v5[6]:
>>> * Remove patches 1-4 as these were commited to vfio-next
>>> * Add the Rb by Cedric and Zhenzhong (previously patches 7, 8, 10, 11)
>>> * Introduce VFIODevice::iommu_dirty_tracking and use it on patch 5, 8
>>> to store whether we can use IOMMU dirty tracking.
>>>
>>> Changes since v4[5]:
>>> * Add various Reviewed-by in patches 2,3,4,6,8,11
>>> * Change error messages to mention IOMMU (Zhenzhong)
>>> * Better improve the checking of dirty page tracking in
>>> vfio_migration_realize() to detect per-device IOMMU instead of using
>>> container dirty_page_supported().
>>> * Improve various commit messages (Eric)
>>> * Extract the caps::hw_caps into its own patch as it was miosleading to
>>> be hidden in another patch (new patch 7)
>>> * Restructure patch 1 helper to be vfio_device_is_mdev() and use
>>> vfio::mdev directly in rest of patches (Cedric)
>>> * Improve error messages of set,query dirty tracking (Cedric)
>>> * Add missing casts to uintptr and uint64_t* (Cedric)
>>> * Add missing commens to struct doc from aw_bits removal (and hw_caps
>>> addition) (Eric)
>>> * Fix the detach flow in auto domains (Eric)
>>> * Set hwpt to NULL on detach (Eric)
>>> * Spurious line (Eric)
>>>
>>> Changes since v3[5]:
>>> * Skip HostIOMMUDevice::realize for mdev, and introduce a helper to check if
>>> the VFIO
>>> device is mdev. (Zhenzhong)
>>> * Skip setting IOMMU device for mdev (Zhenzhong)
>>> * Add Zhenzhong review tag in patch 3
>>> * Utilize vbasedev::bcontainer::dirty_pages_supported instead of introducing
>>> a new HostIOMMUDevice capability and thus remove the cap patch from the
>>> series (Zhenzhong)
>>> * Move the HostIOMMUDevice::realize() to be part of VFIODevice initialization
>>> in attach_device()
>>> while skipping it all together for mdev. (Cedric)
>>> * Due to the previous item, had to remove aw_bits because it depends on device
>>> attach being
>>> finished, instead defer it to when get_cap() gets called.
>>> * Skip auto domains for mdev instead of purposedly erroring out (Zhenzhong)
>>> * Pass errp in all cases, and instead just free the error in case of -EINVAL
>>> in most of all patches, and also pass Error* in
>>> iommufd_backend_alloc_hwpt() amd
>>> set/query dirty. This is made better thanks in part to skipping auto
>>> domains for mdev (Cedric)
>>>
>>> Changes since RFCv2[4]:
>>> * Always allocate hwpt with IOMMU_HWPT_ALLOC_DIRTY_TRACKING even if
>>> we end up not actually toggling dirty tracking. (Avihai)
>>> * Fix error handling widely in auto domains logic and all patches (Avihai)
>>> * Reuse iommufd_backend_get_device_info() for capabilities (Zhenzhong)
>>> * New patches 1 and 2 taking into consideration previous comments.
>>> * Store hwpt::flags to know if we have dirty tracking (Avihai)
>>> * New patch 8, that allows to query dirty tracking support after
>>> provisioning. This is a cleaner way to check IOMMU dirty tracking support
>>> when vfio::migration is iniitalized, as opposed to RFCv2 via device caps.
>>> device caps way is still used because at vfio attach we aren't yet with
>>> a fully initialized migration state.
>>> * Adopt error propagation in query,set dirty tracking
>>> * Misc improvements overall broadly and Avihai
>>> * Drop hugepages as it's a bit unrelated; I can pursue that patch
>>> * separately. The main motivation is to provide a way to test
>>> without hugepages similar to what vfio_type1_iommu.disable_hugepages=1
>>> does.
>>>
>>> Changes since RFCv1[2]:
>>> * Remove intel/amd dirty tracking emulation enabling
>>> * Remove the dirtyrate improvement for VF/IOMMU dirty tracking
>>> [Will pursue these two in separate series]
>>> * Introduce auto domains support
>>> * Enforce dirty tracking following the IOMMUFD UAPI for this
>>> * Add support for toggling hugepages in IOMMUFD
>>> * Auto enable support when VF supports migration to use IOMMU
>>> when it doesn't have VF dirty tracking
>>> * Add a parameter to toggle VF dirty tracking
>>>
>>> [0]
>>> https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.duan@intel.com/
>>> [1]
>>> https://lore.kernel.org/qemu-devel/20240201072818.327930-10-zhenzhong.duan@intel.com/
>>> [2]
>>> https://lore.kernel.org/qemu-devel/20220428211351.3897-1-joao.m.martins@oracle.com/
>>> [3]
>>> https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.martins@oracle.com/
>>> [4]
>>> https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.martins@oracle.com/
>>> [5]
>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-1-joao.m.martins@oracle.com/
>>> [6]
>>> https://lore.kernel.org/qemu-devel/20240719120501.81279-1-joao.m.martins@oracle.com/
>>>
>>> Joao Martins (9):
>>> vfio/iommufd: Introduce auto domain creation
>>> vfio/{iommufd,container}: Remove caps::aw_bits
>>> vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
>>> vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during
>>> attach_device()
>>> vfio/iommufd: Probe and request hwpt dirty tracking capability
>>> vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
>>> vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
>>> vfio/migration: Don't block migration device dirty tracking is
>>> unsupported
>>> vfio/common: Allow disabling device dirty page tracking
>>>
>>> include/hw/vfio/vfio-common.h | 13 +++
>>> include/sysemu/host_iommu_device.h | 5 +-
>>> include/sysemu/iommufd.h | 11 ++
>>> backends/iommufd.c | 85 ++++++++++++++-
>>> hw/vfio/common.c | 19 ++--
>>> hw/vfio/container.c | 9 +-
>>> hw/vfio/helpers.c | 11 ++
>>> hw/vfio/iommufd.c | 170 ++++++++++++++++++++++++++++-
>>> hw/vfio/migration.c | 12 +-
>>> hw/vfio/pci.c | 3 +
>>> backends/trace-events | 3 +
>>> 11 files changed, 318 insertions(+), 23 deletions(-)
>>
>> Applied to vfio-next with the changes that were discussed this morning.
>> Please check.
>>
>
> I think the only thing missing is in the fourth patch to add the comment Eric
> suggested (see below). Other than that, looks good to me.
>
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 50ffa4b77090..abb6f1a4b4a8 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -488,6 +488,13 @@ static bool iommufd_cdev_attach(const char *name,
> VFIODevice *vbasedev,
>
> space = vfio_get_address_space(as);
>
> + /*
> + * The HostIOMMUDevice data from legacy backend is static and doesn't need
> + * any information from the (type1-iommu) backend to be initialized. In
> + * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd
> + * FD to be connected and having a devid to be able to successfully call
> + * iommufd_backend_get_device_info().
> + */
> if (!vfio_device_hiod_realize(vbasedev, errp)) {
> goto err_alloc_ioas;
> }
>
Yep. This is fixed now. I will send a vfio PR in a couple of hours.
Thanks,
C.
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-22 21:13 [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Joao Martins
` (9 preceding siblings ...)
2024-07-23 8:35 ` [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking Cédric Le Goater
@ 2024-07-23 14:23 ` Yi Liu
2024-07-23 14:21 ` Joao Martins
2024-07-23 14:24 ` Cédric Le Goater
10 siblings, 2 replies; 51+ messages in thread
From: Yi Liu @ 2024-07-23 14:23 UTC (permalink / raw)
To: Joao Martins, qemu-devel
Cc: Eric Auger, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 2024/7/23 05:13, Joao Martins wrote:
> This small series adds support for IOMMU dirty tracking support via the
> IOMMUFD backend. The hardware capability is available on most recent x86
> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
> organized as follows:
>
> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>
> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
> we will find and attach a device to a compatible IOMMU domain, or allocate a new
> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
> workflow is relatively simple:
>
> 1) Probe device and allow dirty tracking in the HWPT
> 2) Toggling dirty tracking on/off
> 3) Read-and-clear of Dirty IOVAs
>
> The heuristics selected for (1) were to always request the HWPT for
> dirty tracking if supported, or rely on device dirty page tracking. This
> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
> tracking even if we ask during hwpt allocation.
>
> The unmap case is deferred until further vIOMMU support with migration
> is added[3] which will then introduce the usage of
> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
> dma unmap bitmap flow.
>
> * Patches 8 - 9: Don't block live migration where there's no VF dirty
> tracker, considering that we have IOMMU dirty tracking.
>
> Comments and feedback appreciated (on patches 1, 5, 8, 9)
Hi Joao,
Do you have a github branch for this version? :)
> Cheers,
> Joao
>
> P.S. Suggest v6.11-rc as hypervisor kernel as there's
> some bugs fixed there with regards to IOMMU hugepage dirty tracking.
>
> Changes since v5[6]:
> * Remove patches 1-4 as these were commited to vfio-next
> * Add the Rb by Cedric and Zhenzhong (previously patches 7, 8, 10, 11)
> * Introduce VFIODevice::iommu_dirty_tracking and use it on patch 5, 8
> to store whether we can use IOMMU dirty tracking.
>
> Changes since v4[5]:
> * Add various Reviewed-by in patches 2,3,4,6,8,11
> * Change error messages to mention IOMMU (Zhenzhong)
> * Better improve the checking of dirty page tracking in
> vfio_migration_realize() to detect per-device IOMMU instead of using
> container dirty_page_supported().
> * Improve various commit messages (Eric)
> * Extract the caps::hw_caps into its own patch as it was miosleading to
> be hidden in another patch (new patch 7)
> * Restructure patch 1 helper to be vfio_device_is_mdev() and use
> vfio::mdev directly in rest of patches (Cedric)
> * Improve error messages of set,query dirty tracking (Cedric)
> * Add missing casts to uintptr and uint64_t* (Cedric)
> * Add missing commens to struct doc from aw_bits removal (and hw_caps
> addition) (Eric)
> * Fix the detach flow in auto domains (Eric)
> * Set hwpt to NULL on detach (Eric)
> * Spurious line (Eric)
>
> Changes since v3[5]:
> * Skip HostIOMMUDevice::realize for mdev, and introduce a helper to check if the VFIO
> device is mdev. (Zhenzhong)
> * Skip setting IOMMU device for mdev (Zhenzhong)
> * Add Zhenzhong review tag in patch 3
> * Utilize vbasedev::bcontainer::dirty_pages_supported instead of introducing
> a new HostIOMMUDevice capability and thus remove the cap patch from the series (Zhenzhong)
> * Move the HostIOMMUDevice::realize() to be part of VFIODevice initialization in attach_device()
> while skipping it all together for mdev. (Cedric)
> * Due to the previous item, had to remove aw_bits because it depends on device attach being
> finished, instead defer it to when get_cap() gets called.
> * Skip auto domains for mdev instead of purposedly erroring out (Zhenzhong)
> * Pass errp in all cases, and instead just free the error in case of -EINVAL
> in most of all patches, and also pass Error* in iommufd_backend_alloc_hwpt() amd
> set/query dirty. This is made better thanks in part to skipping auto domains for mdev (Cedric)
>
> Changes since RFCv2[4]:
> * Always allocate hwpt with IOMMU_HWPT_ALLOC_DIRTY_TRACKING even if
> we end up not actually toggling dirty tracking. (Avihai)
> * Fix error handling widely in auto domains logic and all patches (Avihai)
> * Reuse iommufd_backend_get_device_info() for capabilities (Zhenzhong)
> * New patches 1 and 2 taking into consideration previous comments.
> * Store hwpt::flags to know if we have dirty tracking (Avihai)
> * New patch 8, that allows to query dirty tracking support after
> provisioning. This is a cleaner way to check IOMMU dirty tracking support
> when vfio::migration is iniitalized, as opposed to RFCv2 via device caps.
> device caps way is still used because at vfio attach we aren't yet with
> a fully initialized migration state.
> * Adopt error propagation in query,set dirty tracking
> * Misc improvements overall broadly and Avihai
> * Drop hugepages as it's a bit unrelated; I can pursue that patch
> * separately. The main motivation is to provide a way to test
> without hugepages similar to what vfio_type1_iommu.disable_hugepages=1
> does.
>
> Changes since RFCv1[2]:
> * Remove intel/amd dirty tracking emulation enabling
> * Remove the dirtyrate improvement for VF/IOMMU dirty tracking
> [Will pursue these two in separate series]
> * Introduce auto domains support
> * Enforce dirty tracking following the IOMMUFD UAPI for this
> * Add support for toggling hugepages in IOMMUFD
> * Auto enable support when VF supports migration to use IOMMU
> when it doesn't have VF dirty tracking
> * Add a parameter to toggle VF dirty tracking
>
> [0] https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.duan@intel.com/
> [1] https://lore.kernel.org/qemu-devel/20240201072818.327930-10-zhenzhong.duan@intel.com/
> [2] https://lore.kernel.org/qemu-devel/20220428211351.3897-1-joao.m.martins@oracle.com/
> [3] https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.martins@oracle.com/
> [4] https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.martins@oracle.com/
> [5] https://lore.kernel.org/qemu-devel/20240708143420.16953-1-joao.m.martins@oracle.com/
> [6] https://lore.kernel.org/qemu-devel/20240719120501.81279-1-joao.m.martins@oracle.com/
>
> Joao Martins (9):
> vfio/iommufd: Introduce auto domain creation
> vfio/{iommufd,container}: Remove caps::aw_bits
> vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
> vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during
> attach_device()
> vfio/iommufd: Probe and request hwpt dirty tracking capability
> vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
> vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
> vfio/migration: Don't block migration device dirty tracking is
> unsupported
> vfio/common: Allow disabling device dirty page tracking
>
> include/hw/vfio/vfio-common.h | 13 +++
> include/sysemu/host_iommu_device.h | 5 +-
> include/sysemu/iommufd.h | 11 ++
> backends/iommufd.c | 85 ++++++++++++++-
> hw/vfio/common.c | 19 ++--
> hw/vfio/container.c | 9 +-
> hw/vfio/helpers.c | 11 ++
> hw/vfio/iommufd.c | 170 ++++++++++++++++++++++++++++-
> hw/vfio/migration.c | 12 +-
> hw/vfio/pci.c | 3 +
> backends/trace-events | 3 +
> 11 files changed, 318 insertions(+), 23 deletions(-)
>
--
Regards,
Yi Liu
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-23 14:23 ` Yi Liu
@ 2024-07-23 14:21 ` Joao Martins
2024-07-23 14:24 ` Cédric Le Goater
1 sibling, 0 replies; 51+ messages in thread
From: Joao Martins @ 2024-07-23 14:21 UTC (permalink / raw)
To: Yi Liu, qemu-devel
Cc: Eric Auger, Zhenzhong Duan, Alex Williamson, Cedric Le Goater,
Jason Gunthorpe, Avihai Horon
On 23/07/2024 15:23, Yi Liu wrote:
> On 2024/7/23 05:13, Joao Martins wrote:
>> This small series adds support for IOMMU dirty tracking support via the
>> IOMMUFD backend. The hardware capability is available on most recent x86
>> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
>> organized as follows:
>>
>> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>>
>> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
>> we will find and attach a device to a compatible IOMMU domain, or allocate a new
>> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
>> workflow is relatively simple:
>>
>> 1) Probe device and allow dirty tracking in the HWPT
>> 2) Toggling dirty tracking on/off
>> 3) Read-and-clear of Dirty IOVAs
>>
>> The heuristics selected for (1) were to always request the HWPT for
>> dirty tracking if supported, or rely on device dirty page tracking. This
>> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
>> tracking even if we ask during hwpt allocation.
>>
>> The unmap case is deferred until further vIOMMU support with migration
>> is added[3] which will then introduce the usage of
>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
>> dma unmap bitmap flow.
>>
>> * Patches 8 - 9: Don't block live migration where there's no VF dirty
>> tracker, considering that we have IOMMU dirty tracking.
>>
>> Comments and feedback appreciated (on patches 1, 5, 8, 9)
>
> Hi Joao,
>
> Do you have a github branch for this version? :)
>
No, but you can probably use Cedric's vfio-next branch:
https://github.com/legoater/qemu/tree/vfio-next
... Given that he has just submitted his PR with this series.
Joao
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH v6 0/9] hw/iommufd: IOMMUFD Dirty Tracking
2024-07-23 14:23 ` Yi Liu
2024-07-23 14:21 ` Joao Martins
@ 2024-07-23 14:24 ` Cédric Le Goater
1 sibling, 0 replies; 51+ messages in thread
From: Cédric Le Goater @ 2024-07-23 14:24 UTC (permalink / raw)
To: Yi Liu, Joao Martins, qemu-devel
Cc: Eric Auger, Zhenzhong Duan, Alex Williamson, Jason Gunthorpe,
Avihai Horon
Hello
On 7/23/24 16:23, Yi Liu wrote:
> On 2024/7/23 05:13, Joao Martins wrote:
>> This small series adds support for IOMMU dirty tracking support via the
>> IOMMUFD backend. The hardware capability is available on most recent x86
>> hardware (and these SMMUv3 in upcoming v6.11). The series is divided
>> organized as follows:
>>
>> * Patches 1 - 7: IOMMUFD backend support for dirty tracking;
>>
>> Introduce auto domains -- Patch 3 goes into more detail, but the gist is that
>> we will find and attach a device to a compatible IOMMU domain, or allocate a new
>> hardware pagetable *or* rely on kernel IOAS attach (for mdevs). Afterwards the
>> workflow is relatively simple:
>>
>> 1) Probe device and allow dirty tracking in the HWPT
>> 2) Toggling dirty tracking on/off
>> 3) Read-and-clear of Dirty IOVAs
>>
>> The heuristics selected for (1) were to always request the HWPT for
>> dirty tracking if supported, or rely on device dirty page tracking. This
>> is a little simplistic and we aren't necessarily utilizing IOMMU dirty
>> tracking even if we ask during hwpt allocation.
>>
>> The unmap case is deferred until further vIOMMU support with migration
>> is added[3] which will then introduce the usage of
>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
>> dma unmap bitmap flow.
>>
>> * Patches 8 - 9: Don't block live migration where there's no VF dirty
>> tracker, considering that we have IOMMU dirty tracking.
>>
>> Comments and feedback appreciated (on patches 1, 5, 8, 9)
>
> Hi Joao,
>
> Do you have a github branch for this version? :)
There has been some updates since. Please use :
https://github.com/legoater/qemu/commits/vfio-next
or you could wait for the PR to be merged :
https://lore.kernel.org/qemu-devel/20240723140019.387786-1-clg@redhat.com/
Thanks,
C.
>
>> Cheers,
>> Joao
>>
>> P.S. Suggest v6.11-rc as hypervisor kernel as there's
>> some bugs fixed there with regards to IOMMU hugepage dirty tracking.
>>
>> Changes since v5[6]:
>> * Remove patches 1-4 as these were commited to vfio-next
>> * Add the Rb by Cedric and Zhenzhong (previously patches 7, 8, 10, 11)
>> * Introduce VFIODevice::iommu_dirty_tracking and use it on patch 5, 8
>> to store whether we can use IOMMU dirty tracking.
>>
>> Changes since v4[5]:
>> * Add various Reviewed-by in patches 2,3,4,6,8,11
>> * Change error messages to mention IOMMU (Zhenzhong)
>> * Better improve the checking of dirty page tracking in
>> vfio_migration_realize() to detect per-device IOMMU instead of using
>> container dirty_page_supported().
>> * Improve various commit messages (Eric)
>> * Extract the caps::hw_caps into its own patch as it was miosleading to
>> be hidden in another patch (new patch 7)
>> * Restructure patch 1 helper to be vfio_device_is_mdev() and use
>> vfio::mdev directly in rest of patches (Cedric)
>> * Improve error messages of set,query dirty tracking (Cedric)
>> * Add missing casts to uintptr and uint64_t* (Cedric)
>> * Add missing commens to struct doc from aw_bits removal (and hw_caps
>> addition) (Eric)
>> * Fix the detach flow in auto domains (Eric)
>> * Set hwpt to NULL on detach (Eric)
>> * Spurious line (Eric)
>>
>> Changes since v3[5]:
>> * Skip HostIOMMUDevice::realize for mdev, and introduce a helper to check if the VFIO
>> device is mdev. (Zhenzhong)
>> * Skip setting IOMMU device for mdev (Zhenzhong)
>> * Add Zhenzhong review tag in patch 3
>> * Utilize vbasedev::bcontainer::dirty_pages_supported instead of introducing
>> a new HostIOMMUDevice capability and thus remove the cap patch from the series (Zhenzhong)
>> * Move the HostIOMMUDevice::realize() to be part of VFIODevice initialization in attach_device()
>> while skipping it all together for mdev. (Cedric)
>> * Due to the previous item, had to remove aw_bits because it depends on device attach being
>> finished, instead defer it to when get_cap() gets called.
>> * Skip auto domains for mdev instead of purposedly erroring out (Zhenzhong)
>> * Pass errp in all cases, and instead just free the error in case of -EINVAL
>> in most of all patches, and also pass Error* in iommufd_backend_alloc_hwpt() amd
>> set/query dirty. This is made better thanks in part to skipping auto domains for mdev (Cedric)
>>
>> Changes since RFCv2[4]:
>> * Always allocate hwpt with IOMMU_HWPT_ALLOC_DIRTY_TRACKING even if
>> we end up not actually toggling dirty tracking. (Avihai)
>> * Fix error handling widely in auto domains logic and all patches (Avihai)
>> * Reuse iommufd_backend_get_device_info() for capabilities (Zhenzhong)
>> * New patches 1 and 2 taking into consideration previous comments.
>> * Store hwpt::flags to know if we have dirty tracking (Avihai)
>> * New patch 8, that allows to query dirty tracking support after
>> provisioning. This is a cleaner way to check IOMMU dirty tracking support
>> when vfio::migration is iniitalized, as opposed to RFCv2 via device caps.
>> device caps way is still used because at vfio attach we aren't yet with
>> a fully initialized migration state.
>> * Adopt error propagation in query,set dirty tracking
>> * Misc improvements overall broadly and Avihai
>> * Drop hugepages as it's a bit unrelated; I can pursue that patch
>> * separately. The main motivation is to provide a way to test
>> without hugepages similar to what vfio_type1_iommu.disable_hugepages=1
>> does.
>>
>> Changes since RFCv1[2]:
>> * Remove intel/amd dirty tracking emulation enabling
>> * Remove the dirtyrate improvement for VF/IOMMU dirty tracking
>> [Will pursue these two in separate series]
>> * Introduce auto domains support
>> * Enforce dirty tracking following the IOMMUFD UAPI for this
>> * Add support for toggling hugepages in IOMMUFD
>> * Auto enable support when VF supports migration to use IOMMU
>> when it doesn't have VF dirty tracking
>> * Add a parameter to toggle VF dirty tracking
>>
>> [0] https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.duan@intel.com/
>> [1] https://lore.kernel.org/qemu-devel/20240201072818.327930-10-zhenzhong.duan@intel.com/
>> [2] https://lore.kernel.org/qemu-devel/20220428211351.3897-1-joao.m.martins@oracle.com/
>> [3] https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.martins@oracle.com/
>> [4] https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.martins@oracle.com/
>> [5] https://lore.kernel.org/qemu-devel/20240708143420.16953-1-joao.m.martins@oracle.com/
>> [6] https://lore.kernel.org/qemu-devel/20240719120501.81279-1-joao.m.martins@oracle.com/
>>
>> Joao Martins (9):
>> vfio/iommufd: Introduce auto domain creation
>> vfio/{iommufd,container}: Remove caps::aw_bits
>> vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
>> vfio/{iommufd,container}: Invoke HostIOMMUDevice::realize() during
>> attach_device()
>> vfio/iommufd: Probe and request hwpt dirty tracking capability
>> vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
>> vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
>> vfio/migration: Don't block migration device dirty tracking is
>> unsupported
>> vfio/common: Allow disabling device dirty page tracking
>>
>> include/hw/vfio/vfio-common.h | 13 +++
>> include/sysemu/host_iommu_device.h | 5 +-
>> include/sysemu/iommufd.h | 11 ++
>> backends/iommufd.c | 85 ++++++++++++++-
>> hw/vfio/common.c | 19 ++--
>> hw/vfio/container.c | 9 +-
>> hw/vfio/helpers.c | 11 ++
>> hw/vfio/iommufd.c | 170 ++++++++++++++++++++++++++++-
>> hw/vfio/migration.c | 12 +-
>> hw/vfio/pci.c | 3 +
>> backends/trace-events | 3 +
>> 11 files changed, 318 insertions(+), 23 deletions(-)
>>
>
^ permalink raw reply [flat|nested] 51+ messages in thread