* [PULL 01/16] vfio/igd: OpRegion not found fix error typo
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 02/16] vfio: add more VFIOIOMMUClass docs Cédric Le Goater
` (15 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Edmund Raile, Tomita Moeko,
Cédric Le Goater
From: Edmund Raile <edmund.raile@protonmail.com>
Signed-off-by: Edmund Raile <edmund.raile@protonmail.com>
Reviewed-by: Tomita Moeko <tomitamoeko@gmail.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/MFFbQoTpea_CK5ELq8oJ-a3Q57wo7ywQlrIqDvdIDKhUuCm59VUz2QzvdojO5r_wb_7SHifU0Kym3loj4eASPhdzYpjtiMCTePzyg1zrroo=@protonmail.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/igd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d15a014f28fe302cab312ab8e60a414c679..5b6341c5bfeda5c3f5210ea080aba144d9edc90b 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -203,7 +203,7 @@ static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ "Device does not support IGD OpRegion feature");
return false;
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 02/16] vfio: add more VFIOIOMMUClass docs
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
2025-06-05 8:42 ` [PULL 01/16] vfio/igd: OpRegion not found fix error typo Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 03/16] vfio: move more cleanup into vfio_pci_put_device() Cédric Le Goater
` (14 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, John Levon, Cédric Le Goater
From: John Levon <john.levon@nutanix.com>
Add some additional doc comments for these class methods.
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250520162530.2194548-1-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/hw/vfio/vfio-container-base.h | 75 +++++++++++++++++++++++++--
1 file changed, 72 insertions(+), 3 deletions(-)
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3d392b0fd8bca9de1480d75ccfac8f3625e82121..f9e561cb081b361291e72ffe228845bd6b157f92 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -115,13 +115,56 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
ObjectClass parent_class;
- /* basic feature */
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainerBase
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_begin)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_commit)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container.
+ *
+ * @bcontainer: #VFIOContainerBase to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly);
+
/**
* @dma_unmap
*
@@ -132,12 +175,38 @@ struct VFIOIOMMUClass {
* @size: size of the range to unmap
* @iotlb: The IOMMU TLB mapping entry (or NULL)
* @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
void (*detach_device)(VFIODevice *vbasedev);
/* migration feature */
@@ -152,7 +221,7 @@ struct VFIOIOMMUClass {
* @start: indicates whether to start or stop dirty pages tracking
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
@@ -167,7 +236,7 @@ struct VFIOIOMMUClass {
* @size: size of iova range
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 03/16] vfio: move more cleanup into vfio_pci_put_device()
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
2025-06-05 8:42 ` [PULL 01/16] vfio/igd: OpRegion not found fix error typo Cédric Le Goater
2025-06-05 8:42 ` [PULL 02/16] vfio: add more VFIOIOMMUClass docs Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 04/16] vfio: move config space read into vfio_pci_config_setup() Cédric Le Goater
` (13 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, John Levon, Cédric Le Goater
From: John Levon <john.levon@nutanix.com>
All of the cleanup can be done in the same place, and vfio-user will
want to do the same.
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250520150419.2172078-3-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe3751fdf171b0b96d78d0b51853d8e7838..d96b55f80c0aa315d46aaa417c54569d71cbed1f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2854,6 +2854,18 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
static void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
g_free(vdev->vbasedev.name);
@@ -3302,17 +3314,6 @@ static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 04/16] vfio: move config space read into vfio_pci_config_setup()
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (2 preceding siblings ...)
2025-06-05 8:42 ` [PULL 03/16] vfio: move more cleanup into vfio_pci_put_device() Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 05/16] vfio: refactor out IRQ signalling setup Cédric Le Goater
` (12 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, John Levon, Cédric Le Goater
From: John Levon <john.levon@nutanix.com>
Small cleanup that reduces duplicate code for vfio-user and reduces the
size of vfio_realize(); while we're here, correct that name to
vfio_pci_realize().
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250520150419.2172078-4-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d96b55f80c0aa315d46aaa417c54569d71cbed1f..a873f82aeb355cfef68033e43b4829f64145d606 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3017,6 +3017,19 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ vdev->pdev.config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3138,15 +3151,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3201,17 +3213,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
- goto error;
- }
-
if (!vfio_pci_config_setup(vdev, errp)) {
goto error;
}
@@ -3515,7 +3516,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 05/16] vfio: refactor out IRQ signalling setup
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (3 preceding siblings ...)
2025-06-05 8:42 ` [PULL 04/16] vfio: move config space read into vfio_pci_config_setup() Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 06/16] vfio/iommufd: Add comment emphasizing no movement of hiod->realize() call Cédric Le Goater
` (11 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, John Levon, Cédric Le Goater
From: John Levon <john.levon@nutanix.com>
This makes for a slightly more readable vfio_msix_vector_do_use()
implementation, and we will rely on this shortly.
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250520150419.2172078-5-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/pci.c | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a873f82aeb355cfef68033e43b4829f64145d606..b1250d85bf0124a22e3ba3859a47d2b176ade648 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -511,6 +511,25 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -583,21 +602,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 06/16] vfio/iommufd: Add comment emphasizing no movement of hiod->realize() call
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (4 preceding siblings ...)
2025-06-05 8:42 ` [PULL 05/16] vfio: refactor out IRQ signalling setup Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 07/16] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect() Cédric Le Goater
` (10 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, Zhenzhong Duan, Cédric Le Goater
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
The nested IOMMU support needs device and hwpt id which are generated
only after attachment. Hiod encapsulates these information in realize()
and passes to vIOMMU.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250521110301.3313877-1-zhenzhong.duan@intel.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/iommufd.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab10a090a2a4a2913231fc50903be9fb0f1..6b2696793f82fd68c306b85407b5f98201b9f5c6 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -592,6 +592,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 07/16] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect()
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (5 preceding siblings ...)
2025-06-05 8:42 ` [PULL 06/16] vfio/iommufd: Add comment emphasizing no movement of hiod->realize() call Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 08/16] vfio: return mr from vfio_get_xlat_addr Cédric Le Goater
` (9 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Tomita Moeko, Edmund Raile, Corvin Köhne,
Cédric Le Goater
From: Tomita Moeko <tomitamoeko@gmail.com>
In vfio_pci_igd_opregion_detect(), errp will be set when the device does
not have OpRegion or is hotplugged. This errp will be propagated to
pci_qdev_realize(), which interprets it as failure, causing unexpected
termination on devices without OpRegion like SR-IOV VFs or discrete
GPUs. Fix it by not setting errp in vfio_pci_igd_opregion_detect().
This patch also checks if the device has OpRegion before hotplug status
to prevent unwanted warning messages on non-IGD devices.
Fixes: c0273e77f2d7 ("vfio/igd: Detect IGD device by OpRegion")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2968
Reported-by: Edmund Raile <edmund.raile@protonmail.com>
Link: https://lore.kernel.org/qemu-devel/30044d14-17ec-46e3-b9c3-63d27a5bde27@gmail.com
Tested-by: Edmund Raile <edmund.raile@protonmail.com>
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
Link: https://lore.kernel.org/qemu-devel/20250522151636.20001-1-tomitamoeko@gmail.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/igd.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index 5b6341c5bfeda5c3f5210ea080aba144d9edc90b..e7a9d1ffc13044dd01ba83b88b2b3a1c24cfbf98 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not support IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (vdev->pdev.qdev.hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 08/16] vfio: return mr from vfio_get_xlat_addr
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (6 preceding siblings ...)
2025-06-05 8:42 ` [PULL 07/16] vfio/igd: Fix incorrect error propagation in vfio_pci_igd_opregion_detect() Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 09/16] vfio/container: pass MemoryRegion to DMA operations Cédric Le Goater
` (8 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Steve Sistare, David Hildenbrand,
Cédric Le Goater, John Levon, Michael S. Tsirkin,
Zhenzhong Duan
From: Steve Sistare <steven.sistare@oracle.com>
Modify memory_get_xlat_addr and vfio_get_xlat_addr to return the memory
region that the translated address is found in. This will be needed by
CPR in a subsequent patch to map blocks using IOMMU_IOAS_MAP_FILE.
Also return the xlat offset, so we can simplify the interface by removing
the out parameters that can be trivially derived from mr and xlat.
Lastly, rename the functions to to memory_translate_iotlb() and
vfio_translate_iotlb().
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: John Levon <john.levon@nutanix.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Link: https://lore.kernel.org/qemu-devel/1747661203-136490-1-git-send-email-steven.sistare@oracle.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/system/memory.h | 19 +++++++++----------
hw/vfio/listener.c | 33 ++++++++++++++++++++++-----------
hw/virtio/vhost-vdpa.c | 9 +++++++--
system/memory.c | 32 +++++++-------------------------
4 files changed, 45 insertions(+), 48 deletions(-)
diff --git a/include/system/memory.h b/include/system/memory.h
index fc35a0dcad5600dcf9c0699d7765d2a56a3d78a7..0848690ea45dd414d9543be9b24f0c8016657778 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -739,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl);
/**
- * memory_get_xlat_addr: Extract addresses from a TLB entry
+ * memory_translate_iotlb: Extract addresses from a TLB entry.
+ * Called with rcu_read_lock held.
*
* @iotlb: pointer to an #IOMMUTLBEntry
- * @vaddr: virtual address
- * @ram_addr: RAM address
- * @read_only: indicates if writes are allowed
- * @mr_has_discard_manager: indicates memory is controlled by a
- * RamDiscardManager
+ * @xlat_p: return the offset of the entry from the start of the returned
+ * MemoryRegion.
* @errp: pointer to Error*, to store an error if it happens.
*
- * Return: true on success, else false setting @errp with error.
+ * Return: On success, return the MemoryRegion containing the @iotlb translated
+ * addr. The MemoryRegion must not be accessed after rcu_read_unlock.
+ * On failure, return NULL, setting @errp with error.
*/
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp);
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp);
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d8d9cd5b7764a1678142eedf6d553cc07a..38e3dc82cf3a3c46e4319e6245f6143749b3cccf 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
@@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainerBase *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -1010,6 +1017,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
ram_addr_t translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1030,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index e20da95f30368ac05b3a22f13bca1cfd09adb3d1..7061b6e1a3863e7090577214dc0bfa8e2185aebe 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
diff --git a/system/memory.c b/system/memory.c
index 63b983efcdb98f27de87a86fa467328a8e0a0c41..306e9ff9ebc58a2f17a23fdd7f8c60b5d9c683fb 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2174,18 +2174,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
}
/* Called with rcu_read_lock held. */
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp)
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
MemoryRegion *mr;
hwaddr xlat;
hwaddr len = iotlb->addr_mask + 1;
bool writable = iotlb->perm & IOMMU_WO;
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = false;
- }
/*
* The IOMMU TLB entry we have just covers translation through
* this IOMMU to its immediate target. We need to translate
@@ -2195,7 +2191,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
&xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
if (!memory_region_is_ram(mr)) {
error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat);
- return false;
+ return NULL;
} else if (memory_region_has_ram_discard_manager(mr)) {
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
MemoryRegionSection tmp = {
@@ -2203,9 +2199,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
.offset_within_region = xlat,
.size = int128_make64(len),
};
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = true;
- }
/*
* Malicious VMs can map memory into the IOMMU, which is expected
* to remain discarded. vfio will pin all pages, populating memory.
@@ -2216,7 +2209,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_setg(errp, "iommu map to discarded memory (e.g., unplugged"
" via virtio-mem): %" HWADDR_PRIx "",
iotlb->translated_addr);
- return false;
+ return NULL;
}
}
@@ -2226,22 +2219,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*/
if (len & iotlb->addr_mask) {
error_setg(errp, "iommu has granularity incompatible with target AS");
- return false;
- }
-
- if (vaddr) {
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- }
-
- if (ram_addr) {
- *ram_addr = memory_region_get_ram_addr(mr) + xlat;
- }
-
- if (read_only) {
- *read_only = !writable || mr->readonly;
+ return NULL;
}
- return true;
+ *xlat_p = xlat;
+ return mr;
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 09/16] vfio/container: pass MemoryRegion to DMA operations
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (7 preceding siblings ...)
2025-06-05 8:42 ` [PULL 08/16] vfio: return mr from vfio_get_xlat_addr Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 10/16] backends/iommufd: Add a helper to invalidate user-managed HWPT Cédric Le Goater
` (7 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, John Levon, John Johnson, Jagannathan Raman,
Elena Ufimtseva, Cédric Le Goater, Steve Sistare
From: John Levon <john.levon@nutanix.com>
Pass through the MemoryRegion to DMA operation handlers of vfio
containers. The vfio-user container will need this later, to translate
the vaddr into an offset for the dma map vfio-user message; CPR will
also will need this.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Steve Sistare <steven.sistare@oracle.com>
Link: https://lore.kernel.org/qemu-devel/20250521215534.2688540-1-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/hw/vfio/vfio-container-base.h | 9 +++++----
hw/vfio/container-base.c | 4 ++--
hw/vfio/container.c | 3 ++-
hw/vfio/iommufd.c | 3 ++-
hw/vfio/listener.c | 6 +++---
5 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index f9e561cb081b361291e72ffe228845bd6b157f92..3feb773e5f41706a310b676c088ff95a85304325 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
@@ -151,20 +151,21 @@ struct VFIOIOMMUClass {
/**
* @dma_map
*
- * Map an address range into the container.
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
*
* @bcontainer: #VFIOContainerBase to use
* @iova: start address to map
* @size: size of the range to map
* @vaddr: process virtual address of mapping
* @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
*
* Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
-
+ void *vaddr, bool readonly, MemoryRegion *mr);
/**
* @dma_unmap
*
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 1c6ca94b6040813a63bb8fca07b25189a8d8cb76..d834bd482290a8b195f94c07832b7f8020504c3a 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dbaec4c8d83450048d9bc53b34d859731ebb..a8c76eb48165405e2b41186a266dbc69ae1b782a 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -207,7 +207,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 6b2696793f82fd68c306b85407b5f98201b9f5c6..46a3b36301e9a690e04347005e64552ed0abede5 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -34,7 +34,8 @@
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index 38e3dc82cf3a3c46e4319e6245f6143749b3cccf..74958661236e230751e52dbfc7857426358dcaac 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -170,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -240,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -564,7 +564,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 10/16] backends/iommufd: Add a helper to invalidate user-managed HWPT
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (8 preceding siblings ...)
2025-06-05 8:42 ` [PULL 09/16] vfio/container: pass MemoryRegion to DMA operations Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 11/16] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Cédric Le Goater
` (6 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Zhenzhong Duan, Nicolin Chen,
Cédric Le Goater, Eric Auger
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
This helper passes cache invalidation request from guest to invalidate
stage-1 page table cache in host hardware.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250604062115.4004200-2-zhenzhong.duan@intel.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/system/iommufd.h | 4 ++++
backends/iommufd.c | 36 ++++++++++++++++++++++++++++++++++++
backends/trace-events | 1 +
3 files changed, 41 insertions(+)
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index cbab75bfbf67fe75e97dffbbbd2d98caca7f7c5e..83ab8e1e4ca034ecad07e08d55ef3cb5183b1fe2 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -61,6 +61,10 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
uint64_t iova, ram_addr_t size,
uint64_t page_size, uint64_t *data,
Error **errp);
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index b73f75cd0bca694267e8c54af9750f08cd305a50..8bcdb60fe7ee439503f0fcc4d65d50c5916dcf1f 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -311,6 +311,42 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ uint32_t total_entries = *entry_num;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = total_entries,
+ .data_uptr = (uintptr_t)data,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+ trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
+ total_entries, cache.entry_num,
+ (uintptr_t)data, ret ? errno : 0);
+ *entry_num = cache.entry_num;
+
+ if (ret) {
+ error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
+ " total %d entries, processed %d entries",
+ total_entries, cache.entry_num);
+ } else if (total_entries != cache.entry_num) {
+ error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
+ " entries: total %d entries, processed %d entries."
+ " Kernel BUG?!", total_entries, cache.entry_num);
+ return false;
+ }
+
+ return !ret;
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
diff --git a/backends/trace-events b/backends/trace-events
index 40811a316215bad7b8554cedf3b8ba1968a6fbfe..7278214ea51144b7d7b36d64908567c9467036e8 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
+iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 11/16] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (9 preceding siblings ...)
2025-06-05 8:42 ` [PULL 10/16] backends/iommufd: Add a helper to invalidate user-managed HWPT Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 12/16] vfio/iommufd: Implement [at|de]tach_hwpt handlers Cédric Le Goater
` (5 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Zhenzhong Duan, Cédric Le Goater,
Eric Auger
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Enhance HostIOMMUDeviceIOMMUFD object with 3 new members, specific
to the iommufd BE + 2 new class functions.
IOMMUFD BE includes IOMMUFD handle, devid and hwpt_id. IOMMUFD handle
and devid are used to allocate/free ioas and hwpt. hwpt_id is used to
re-attach IOMMUFD backed device to its default VFIO sub-system created
hwpt, i.e., when vIOMMU is disabled by guest. These properties are
initialized in hiod::realize() after attachment.
2 new class functions are [at|de]tach_hwpt(). They are used to
attach/detach hwpt. VFIO and VDPA can have different implementions,
so implementation will be in sub-class instead of HostIOMMUDeviceIOMMUFD,
e.g., in HostIOMMUDeviceIOMMUFDVFIO.
Add two wrappers host_iommu_device_iommufd_[at|de]tach_hwpt to wrap the
two functions.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250604062115.4004200-3-zhenzhong.duan@intel.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/system/iommufd.h | 50 ++++++++++++++++++++++++++++++++++++++++
backends/iommufd.c | 22 ++++++++++++++++++
hw/vfio/iommufd.c | 6 +++++
3 files changed, 78 insertions(+)
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 83ab8e1e4ca034ecad07e08d55ef3cb5183b1fe2..283861b92432a1297e3e7465bdf86c3816393dcc 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -67,4 +67,54 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
+OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
+ HOST_IOMMU_DEVICE_IOMMUFD)
+
+/* Overload of the host IOMMU device for the iommufd backend */
+struct HostIOMMUDeviceIOMMUFD {
+ HostIOMMUDevice parent_obj;
+
+ IOMMUFDBackend *iommufd;
+ uint32_t devid;
+ uint32_t hwpt_id;
+};
+
+struct HostIOMMUDeviceIOMMUFDClass {
+ HostIOMMUDeviceClass parent_class;
+
+ /**
+ * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @hwpt_id: ID of IOMMUFD hardware page table.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
+ Error **errp);
+ /**
+ * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
+};
+
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp);
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp);
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 8bcdb60fe7ee439503f0fcc4d65d50c5916dcf1f..c2c47abf7ed533c0512b89e58605b6b20ac103e8 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -347,6 +347,26 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
return !ret;
}
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->attach_hwpt);
+ return idevc->attach_hwpt(idev, hwpt_id, errp);
+}
+
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->detach_hwpt);
+ return idevc->detach_hwpt(idev, errp);
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
@@ -385,6 +405,8 @@ static const TypeInfo types[] = {
}, {
.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
.parent = TYPE_HOST_IOMMU_DEVICE,
+ .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
+ .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
.class_init = hiod_iommufd_class_init,
.abstract = true,
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 46a3b36301e9a690e04347005e64552ed0abede5..625b1747a29648918298e3a6cb3af68c5459ead5 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -819,6 +819,7 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
enum iommu_hw_info_type type;
union {
@@ -838,6 +839,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 12/16] vfio/iommufd: Implement [at|de]tach_hwpt handlers
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (10 preceding siblings ...)
2025-06-05 8:42 ` [PULL 11/16] vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 13/16] vfio/iommufd: Save vendor specific device info Cédric Le Goater
` (4 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Zhenzhong Duan, Yi Liu, Cédric Le Goater,
Eric Auger, Nicolin Chen
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Implement [at|de]tach_hwpt handlers in VFIO subsystem. vIOMMU
utilizes them to attach to or detach from hwpt on host side.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/qemu-devel/20250604062115.4004200-4-zhenzhong.duan@intel.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/vfio/iommufd.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 625b1747a29648918298e3a6cb3af68c5459ead5..59033028373c82a22b1aaf092503d037a6d7e9b6 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -815,6 +815,24 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
@@ -869,10 +887,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 13/16] vfio/iommufd: Save vendor specific device info
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (11 preceding siblings ...)
2025-06-05 8:42 ` [PULL 12/16] vfio/iommufd: Implement [at|de]tach_hwpt handlers Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 14/16] MAINTAINERS: Add reviewer for CPR Cédric Le Goater
` (3 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Zhenzhong Duan, Eric Auger, Nicolin Chen,
Cédric Le Goater
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Some device information returned by ioctl(IOMMU_GET_HW_INFO) are vendor
specific. Save them as raw data in a union supporting different vendors,
then vendor IOMMU can query the raw data with its fixed format for
capability directly.
Because IOMMU_GET_HW_INFO is only supported in linux, so declare those
capability related structures with CONFIG_LINUX.
Suggested-by: Eric Auger <eric.auger@redhat.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250604062115.4004200-5-zhenzhong.duan@intel.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/system/host_iommu_device.h | 15 +++++++++++++++
hw/vfio/iommufd.c | 8 +++-----
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h
index 809cced4ba5c56263132b474a382e4bd0ffdd3cd..ab849a4a82d5f2a2a09c7924791b93c26d9ff902 100644
--- a/include/system/host_iommu_device.h
+++ b/include/system/host_iommu_device.h
@@ -14,6 +14,13 @@
#include "qom/object.h"
#include "qapi/error.h"
+#ifdef CONFIG_LINUX
+#include "linux/iommufd.h"
+
+typedef union VendorCaps {
+ struct iommu_hw_info_vtd vtd;
+ struct iommu_hw_info_arm_smmuv3 smmuv3;
+} VendorCaps;
/**
* struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
@@ -22,11 +29,17 @@
*
* @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
* the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
+ *
+ * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on
+ * IOMMUFD this represents a user-space buffer filled by kernel
+ * with host IOMMU @type specific hardware information data)
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
uint64_t hw_caps;
+ VendorCaps vendor_caps;
} HostIOMMUDeviceCaps;
+#endif
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
@@ -38,7 +51,9 @@ struct HostIOMMUDevice {
void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
PCIBus *aliased_bus;
int aliased_devfn;
+#ifdef CONFIG_LINUX
HostIOMMUDeviceCaps caps;
+#endif
};
/**
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 59033028373c82a22b1aaf092503d037a6d7e9b6..c4bbf36241d3ee5c46bca9601688afd742e471f7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -839,16 +839,14 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
VFIODevice *vdev = opaque;
HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 14/16] MAINTAINERS: Add reviewer for CPR
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (12 preceding siblings ...)
2025-06-05 8:42 ` [PULL 13/16] vfio/iommufd: Save vendor specific device info Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 15/16] vfio: vfio_find_ram_discard_listener Cédric Le Goater
` (2 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson, Steve Sistare, Cédric Le Goater
From: Steve Sistare <steven.sistare@oracle.com>
CPR is integrated with live migration, and has the same maintainers.
But, add a CPR section to add a reviewer.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/1748546679-154091-2-git-send-email-steven.sistare@oracle.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
MAINTAINERS | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 16af37986a4d4d3c1700bc4e56163cb85c05f987..b189701357ccf1010d60c88916a8e1be42d2a615 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3032,6 +3032,15 @@ F: include/qemu/co-shared-resource.h
T: git https://gitlab.com/jsnow/qemu.git jobs
T: git https://gitlab.com/vsementsov/qemu.git block
+CheckPoint and Restart (CPR)
+R: Steve Sistare <steven.sistare@oracle.com>
+S: Supported
+F: hw/vfio/cpr*
+F: include/migration/cpr.h
+F: migration/cpr*
+F: tests/qtest/migration/cpr*
+F: docs/devel/migration/CPR.rst
+
Compute Express Link
M: Jonathan Cameron <jonathan.cameron@huawei.com>
R: Fan Ni <fan.ni@samsung.com>
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 15/16] vfio: vfio_find_ram_discard_listener
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (13 preceding siblings ...)
2025-06-05 8:42 ` [PULL 14/16] MAINTAINERS: Add reviewer for CPR Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 8:42 ` [PULL 16/16] vfio: move vfio-cpr.h Cédric Le Goater
2025-06-05 19:00 ` [PULL 00/16] vfio queue Stefan Hajnoczi
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Steve Sistare, Cédric Le Goater,
Zhenzhong Duan
From: Steve Sistare <steven.sistare@oracle.com>
Define vfio_find_ram_discard_listener as a subroutine so additional calls to
it may be added in a subsequent patch.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Link: https://lore.kernel.org/qemu-devel/1748546679-154091-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/hw/vfio/vfio-container-base.h | 3 +++
hw/vfio/listener.c | 35 +++++++++++++++++----------
2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3feb773e5f41706a310b676c088ff95a85304325..9d37f86115a873eb164ae90c5ebaf2acd2c6a5d8 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -253,4 +253,7 @@ struct VFIOIOMMUClass {
void (*release)(VFIOContainerBase *bcontainer);
};
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index 74958661236e230751e52dbfc7857426358dcaac..203ed0314ec46225651a51d91df9c2e7c76168ed 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -456,6 +456,26 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -1086,19 +1106,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PULL 16/16] vfio: move vfio-cpr.h
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (14 preceding siblings ...)
2025-06-05 8:42 ` [PULL 15/16] vfio: vfio_find_ram_discard_listener Cédric Le Goater
@ 2025-06-05 8:42 ` Cédric Le Goater
2025-06-05 19:00 ` [PULL 00/16] vfio queue Stefan Hajnoczi
16 siblings, 0 replies; 18+ messages in thread
From: Cédric Le Goater @ 2025-06-05 8:42 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Williamson, Steve Sistare, Cédric Le Goater,
Zhenzhong Duan
From: Steve Sistare <steven.sistare@oracle.com>
Move vfio-cpr.h to include/hw/vfio, because it will need to be included by
other files there.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Link: https://lore.kernel.org/qemu-devel/1748546679-154091-9-git-send-email-steven.sistare@oracle.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
MAINTAINERS | 1 +
hw/vfio/vfio-cpr.h | 15 ---------------
include/hw/vfio/vfio-cpr.h | 18 ++++++++++++++++++
hw/vfio/container.c | 2 +-
hw/vfio/cpr.c | 2 +-
hw/vfio/iommufd.c | 2 +-
6 files changed, 22 insertions(+), 18 deletions(-)
delete mode 100644 hw/vfio/vfio-cpr.h
create mode 100644 include/hw/vfio/vfio-cpr.h
diff --git a/MAINTAINERS b/MAINTAINERS
index b189701357ccf1010d60c88916a8e1be42d2a615..aa6763077ea87af0d527319f1cb13e4468b1f299 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3036,6 +3036,7 @@ CheckPoint and Restart (CPR)
R: Steve Sistare <steven.sistare@oracle.com>
S: Supported
F: hw/vfio/cpr*
+F: include/hw/vfio/vfio-cpr.h
F: include/migration/cpr.h
F: migration/cpr*
F: tests/qtest/migration/cpr*
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a62461cbe9409bbe42057fb4134d84dcd1..0000000000000000000000000000000000000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000000000000000000000000000000000000..750ea5bcea38a953c781817f7e76a64422b79d4c
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,18 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+struct VFIOContainerBase;
+
+bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
+ Error **errp);
+void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a8c76eb48165405e2b41186a266dbc69ae1b782a..0f948d024711062e2facf44b4d4694a0f1e59b83 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -33,8 +33,8 @@
#include "qapi/error.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184f97421a17b4d981facdcadde3a1fcec9a..0210e76a10993e0c01403fec51918cc9fca7b636 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -8,9 +8,9 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index c4bbf36241d3ee5c46bca9601688afd742e471f7..d3efef71afd4fd9e68455cefe7666c9f70e8c61e 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,13 +21,13 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
--
2.49.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PULL 00/16] vfio queue
2025-06-05 8:42 [PULL 00/16] vfio queue Cédric Le Goater
` (15 preceding siblings ...)
2025-06-05 8:42 ` [PULL 16/16] vfio: move vfio-cpr.h Cédric Le Goater
@ 2025-06-05 19:00 ` Stefan Hajnoczi
16 siblings, 0 replies; 18+ messages in thread
From: Stefan Hajnoczi @ 2025-06-05 19:00 UTC (permalink / raw)
To: Cédric Le Goater; +Cc: qemu-devel, Alex Williamson, Cédric Le Goater
[-- Attachment #1: Type: text/plain, Size: 116 bytes --]
Applied, thanks.
Please update the changelog at https://wiki.qemu.org/ChangeLog/10.1 for any user-visible changes.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread