* [PATCH v5 01/15] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 02/15] iommufd: Extend attach/detach_hwpt callbacks to support pasid Zhenzhong Duan
` (13 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
For attachment with pasid, pasid together with flag VFIO_DEVICE_ATTACH_PASID
should be passed in.
Define IOMMU_NO_PASID to represent device attachment without pasid same as
in kernel.
The implementation is similar for detachment.
Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
include/hw/core/iommu.h | 2 ++
hw/vfio/iommufd.c | 44 +++++++++++++++++++++++++----------------
hw/vfio/trace-events | 4 ++--
3 files changed, 31 insertions(+), 19 deletions(-)
diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
index cd59a367ce..77739d4214 100644
--- a/include/hw/core/iommu.h
+++ b/include/hw/core/iommu.h
@@ -30,4 +30,6 @@ enum host_iommu_quirks {
HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO = BIT_ULL(0),
};
+#define IOMMU_NO_PASID 0
+
#endif /* HW_IOMMU_H */
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index df148a49a7..f86f6f0d7b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -20,6 +20,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "system/iommufd.h"
+#include "hw/core/iommu.h"
#include "hw/core/qdev.h"
#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
@@ -305,43 +306,48 @@ out:
return ret;
}
-static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
- Error **errp)
+static int iommufd_cdev_pasid_attach_ioas_hwpt(VFIODevice *vbasedev,
+ uint32_t pasid, uint32_t id,
+ Error **errp)
{
int iommufd = vbasedev->iommufd->fd;
struct vfio_device_attach_iommufd_pt attach_data = {
.argsz = sizeof(attach_data),
- .flags = 0,
+ .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_ATTACH_PASID,
+ .pasid = pasid,
.pt_id = id,
};
/* Attach device to an IOAS or hwpt within iommufd */
if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) {
error_setg_errno(errp, errno,
- "[iommufd=%d] error attach %s (%d) to id=%d",
- iommufd, vbasedev->name, vbasedev->fd, id);
+ "[iommufd=%d] error attach %s (%d) pasid %d to id=%d",
+ iommufd, vbasedev->name, vbasedev->fd, pasid, id);
return -errno;
}
- trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
- vbasedev->fd, id);
+ trace_iommufd_cdev_pasid_attach_ioas_hwpt(iommufd, vbasedev->name,
+ vbasedev->fd, pasid, id);
return 0;
}
-static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
+static bool iommufd_cdev_pasid_detach_ioas_hwpt(VFIODevice *vbasedev,
+ uint32_t pasid, Error **errp)
{
int iommufd = vbasedev->iommufd->fd;
struct vfio_device_detach_iommufd_pt detach_data = {
.argsz = sizeof(detach_data),
- .flags = 0,
+ .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_DETACH_PASID,
+ .pasid = pasid,
};
if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) {
- error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
+ error_setg_errno(errp, errno, "detach %s pasid %d failed",
+ vbasedev->name, pasid);
return false;
}
- trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
+ trace_iommufd_cdev_pasid_detach_ioas_hwpt(iommufd, vbasedev->name, pasid);
return true;
}
@@ -363,7 +369,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
if (!cpr_is_incoming()) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ hwpt->hwpt_id, errp);
} else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
ret = 0;
} else {
@@ -442,7 +449,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, hwpt_id,
+ errp);
if (ret) {
iommufd_backend_free_id(container->be, hwpt_id);
return false;
@@ -495,7 +503,8 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
/* If CPR, we are already attached to ioas_id. */
return cpr_is_incoming() ||
- !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -503,7 +512,7 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
{
Error *err = NULL;
- if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
+ if (!iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, &err)) {
error_report_err(err);
}
@@ -929,7 +938,8 @@ host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(hiodi)->agent;
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ hwpt_id, errp);
}
static bool
@@ -938,7 +948,7 @@ host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(hiodi)->agent;
- return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+ return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
}
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 2049159015..8dbb477298 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -183,8 +183,8 @@ vfio_vmstate_change_prepare(const char *name, int running, const char *reason, c
iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
-iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
-iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s"
+iommufd_cdev_pasid_attach_ioas_hwpt(int iommufd, const char *name, int devfd, uint32_t pasid, int id) " [iommufd=%d] Successfully attached device %s (%d) pasid %u to id=%d"
+iommufd_cdev_pasid_detach_ioas_hwpt(int iommufd, const char *name, uint32_t pasid) " [iommufd=%d] Successfully detached %s pasid %u"
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 02/15] iommufd: Extend attach/detach_hwpt callbacks to support pasid
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 01/15] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 03/15] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag Zhenzhong Duan
` (12 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan, qemu-arm
Same for the two wrappers and their call sites.
Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
include/system/iommufd.h | 18 ++++++++++++------
backends/iommufd.c | 9 +++++----
hw/arm/smmuv3-accel.c | 12 ++++++++----
hw/i386/intel_iommu_accel.c | 19 ++++++++++---------
hw/vfio/iommufd.c | 10 +++++-----
5 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 2925d116ac..4257e50f62 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -138,14 +138,16 @@ struct HostIOMMUDeviceIOMMUFDClass {
*
* @hiodi: host IOMMU device backed by IOMMUFD backend.
*
+ * @pasid: target pasid of the device to be attached.
+ *
* @hwpt_id: ID of IOMMUFD hardware page table.
*
* @errp: pass an Error out when attachment fails.
*
* Returns: true on success, false on failure.
*/
- bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *hiodi, uint32_t hwpt_id,
- Error **errp);
+ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *hiodi, uint32_t pasid,
+ uint32_t hwpt_id, Error **errp);
/**
* @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
* VFIO and VDPA device can have different implementation.
@@ -154,15 +156,19 @@ struct HostIOMMUDeviceIOMMUFDClass {
*
* @hiodi: host IOMMU device backed by IOMMUFD backend.
*
- * @errp: pass an Error out when attachment fails.
+ * @pasid: target pasid of the device to be detached.
+ *
+ * @errp: pass an Error out when detachment fails.
*
* Returns: true on success, false on failure.
*/
- bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *hiodi, Error **errp);
+ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *hiodi, uint32_t pasid,
+ Error **errp);
};
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
- uint32_t hwpt_id, Error **errp);
-bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
+ uint32_t pasid, uint32_t hwpt_id,
Error **errp);
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
+ uint32_t pasid, Error **errp);
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 410b044370..cfde6f2b2c 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -539,23 +539,24 @@ bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t viommu_id,
}
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
- uint32_t hwpt_id, Error **errp)
+ uint32_t pasid, uint32_t hwpt_id,
+ Error **errp)
{
HostIOMMUDeviceIOMMUFDClass *hiodic =
HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(hiodi);
g_assert(hiodic->attach_hwpt);
- return hiodic->attach_hwpt(hiodi, hwpt_id, errp);
+ return hiodic->attach_hwpt(hiodi, pasid, hwpt_id, errp);
}
bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
- Error **errp)
+ uint32_t pasid, Error **errp)
{
HostIOMMUDeviceIOMMUFDClass *hiodic =
HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(hiodi);
g_assert(hiodic->detach_hwpt);
- return hiodic->detach_hwpt(hiodi, errp);
+ return hiodic->detach_hwpt(hiodi, pasid, errp);
}
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 862be814a0..2947e2e5dc 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -300,7 +300,8 @@ bool smmuv3_accel_install_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
return false;
}
- if (!host_iommu_device_iommufd_attach_hwpt(hiodi, hwpt_id, errp)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID, hwpt_id,
+ errp)) {
if (s1_hwpt) {
iommufd_backend_free_id(hiodi->iommufd, s1_hwpt->hwpt_id);
g_free(s1_hwpt);
@@ -575,7 +576,8 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *hiodi,
/* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
- if (!host_iommu_device_iommufd_attach_hwpt(hiodi, hwpt_id, errp)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID, hwpt_id,
+ errp)) {
goto free_veventq;
}
return true;
@@ -665,7 +667,8 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
hiodi = accel_dev->hiodi;
accel = accel_dev->s_accel;
/* Re-attach the default s2 hwpt id */
- if (!host_iommu_device_iommufd_attach_hwpt(hiodi, hiodi->hwpt_id, NULL)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID,
+ hiodi->hwpt_id, NULL)) {
error_report("Unable to attach the default HW pagetable: hiodi devid "
"0x%x", hiodi->devid);
}
@@ -879,7 +882,8 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp)
hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
QLIST_FOREACH(accel_dev, &accel->device_list, next) {
- if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->hiodi, hwpt_id,
+ if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->hiodi,
+ IOMMU_NO_PASID, hwpt_id,
&local_err)) {
error_append_hint(&local_err, "Failed to attach GBPA hwpt %u for "
"hiodi devid %u", hwpt_id,
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index ed3793602b..3217a2afac 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -121,8 +121,9 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
}
- ret = host_iommu_device_iommufd_attach_hwpt(hiodi, hwpt_id, errp);
- trace_vtd_device_attach_hwpt(hiodi->devid, vtd_as->pasid, hwpt_id, ret);
+ ret = host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID, hwpt_id,
+ errp);
+ trace_vtd_device_attach_hwpt(hiodi->devid, IOMMU_NO_PASID, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
vtd_destroy_old_fs_hwpt(hiodi, vtd_as);
@@ -141,22 +142,22 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
{
HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
IntelIOMMUState *s = vtd_as->iommu_state;
- uint32_t pasid = vtd_as->pasid;
bool ret;
if (s->dmar_enabled && s->root_scalable) {
- ret = host_iommu_device_iommufd_detach_hwpt(hiodi, errp);
- trace_vtd_device_detach_hwpt(hiodi->devid, pasid, ret);
+ ret = host_iommu_device_iommufd_detach_hwpt(hiodi, IOMMU_NO_PASID,
+ errp);
+ trace_vtd_device_detach_hwpt(hiodi->devid, IOMMU_NO_PASID, ret);
} else {
/*
* If DMAR remapping is disabled or guest switches to legacy mode,
* we fallback to the default HWPT which contains shadow page table.
* So guest DMA could still work.
*/
- ret = host_iommu_device_iommufd_attach_hwpt(hiodi, hiodi->hwpt_id,
- errp);
- trace_vtd_device_reattach_def_hwpt(hiodi->devid, pasid, hiodi->hwpt_id,
- ret);
+ ret = host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID,
+ hiodi->hwpt_id, errp);
+ trace_vtd_device_reattach_def_hwpt(hiodi->devid, IOMMU_NO_PASID,
+ hiodi->hwpt_id, ret);
}
if (ret) {
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index f86f6f0d7b..78e7b6a045 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -934,21 +934,21 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
static bool
host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
- uint32_t hwpt_id, Error **errp)
+ uint32_t pasid, uint32_t hwpt_id,
+ Error **errp)
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(hiodi)->agent;
- return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
- hwpt_id, errp);
+ return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, pasid, hwpt_id, errp);
}
static bool
host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
- Error **errp)
+ uint32_t pasid, Error **errp)
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(hiodi)->agent;
- return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
+ return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, pasid, errp);
}
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 03/15] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 01/15] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 02/15] iommufd: Extend attach/detach_hwpt callbacks to support pasid Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 04/15] intel_iommu: Create the nested " Zhenzhong Duan
` (11 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When both device and vIOMMU have PASID enabled, then guest may setup
pasid usages such as SVM.
VFIO needs to be aware of potential pasid usage and should attach the
non-pasid part of pasid-capable device to hwpt flagged with
IOMMU_HWPT_ALLOC_PASID.
ARM SMMU doesn't support IOMMU_HWPT_ALLOC_PASID, only VTD need it. So
we can't check the existing vIOMMU flag VIOMMU_FLAG_PASID_SUPPORTED to
determine if set flag IOMMU_HWPT_ALLOC_PASID. Instead, introduce a new
flag VIOMMU_FLAG_WANT_PASID_ATTACH which will only be exposed by VTD.
Opportunistically add documentation for VIOMMU_FLAG_PASID_SUPPORTED
and explain the difference with VIOMMU_FLAG_WANT_PASID_ATTACH.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
include/hw/core/iommu.h | 11 +++++++++++
include/hw/vfio/vfio-device.h | 1 +
hw/vfio/device.c | 11 +++++++++++
hw/vfio/iommufd.c | 8 +++++++-
4 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
index 77739d4214..20d6d79062 100644
--- a/include/hw/core/iommu.h
+++ b/include/hw/core/iommu.h
@@ -20,9 +20,20 @@
enum viommu_flags {
/* vIOMMU needs nesting parent HWPT to create nested HWPT */
VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0),
+ /*
+ * vIOMMU supports PASID capability, VFIO checks this flag and synthesize
+ * a PASID capability.
+ */
VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1),
/* vIOMMU needs dirty tracking on the nesting parent HWPT for nested use */
VIOMMU_FLAG_WANT_NESTING_DIRTY_TRACKING = BIT_ULL(2),
+ /*
+ * vIOMMU requests other sub-system like VFIO to create a HWPT that can be
+ * used with PASID attachment. VIOMMU_FLAG_PASID_SUPPORTED can't be used
+ * for this purpose as PASID attachment is needed by VTD IOMMU but not ARM
+ * SMMU.
+ */
+ VIOMMU_FLAG_WANT_PASID_ATTACH = BIT_ULL(3),
};
/* Host IOMMU quirks. Extracted from host IOMMU capabilities */
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 380a55d6e5..8472420d3f 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -282,6 +282,7 @@ void vfio_device_unprepare(VFIODevice *vbasedev);
bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
bool vfio_device_get_viommu_flags_want_nesting_dirty(VFIODevice *vbasedev);
+bool vfio_device_get_viommu_flags_want_pasid_attach(VFIODevice *vbasedev);
bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
uint32_t type, void *caps,
uint32_t size);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 3ffd69a579..b954b44d31 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -544,6 +544,17 @@ bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
return false;
}
+bool vfio_device_get_viommu_flags_want_pasid_attach(VFIODevice *vbasedev)
+{
+ VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
+
+ if (vdev) {
+ return !!(pci_device_get_viommu_flags(PCI_DEVICE(vdev)) &
+ VIOMMU_FLAG_WANT_PASID_ATTACH);
+ }
+ return false;
+}
+
bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
uint32_t type, void *caps,
uint32_t size)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 78e7b6a045..0718f029ca 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -364,6 +364,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
VendorCaps caps;
VFIOIOASHwpt *hwpt;
uint32_t hwpt_id;
+ uint8_t max_pasid_log2 = 0;
int ret;
/* Try to find a domain */
@@ -409,7 +410,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
*/
if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
&type, &caps, sizeof(caps), &hw_caps,
- NULL, errp)) {
+ &max_pasid_log2, errp)) {
return false;
}
@@ -437,6 +438,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
}
}
+ if (max_pasid_log2 &&
+ vfio_device_get_viommu_flags_want_pasid_attach(vbasedev)) {
+ flags |= IOMMU_HWPT_ALLOC_PASID;
+ }
+
if (cpr_is_incoming()) {
hwpt_id = vbasedev->cpr.hwpt_id;
goto skip_alloc;
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 04/15] intel_iommu: Create the nested hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (2 preceding siblings ...)
2026-05-09 4:07 ` [PATCH v5 03/15] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-09 4:07 ` [PATCH v5 05/15] intel_iommu: Rename pasid property to "pasid-bits" and define it as type uint8 Zhenzhong Duan
` (10 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When pasid is enabled, any hwpt attached to non-PASID or PASID should be
IOMMU_HWPT_ALLOC_PASID flagged, or else attachment fails.
Change vtd_destroy_old_fs_hwpt() to pass in 'VTDHostIOMMUDevice *' for
naming consistency.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_accel.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 3217a2afac..bd1236c070 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -69,11 +69,13 @@ VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
return NULL;
}
-static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
+static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
VTDPASIDEntry *pe, uint32_t *fs_hwpt_id,
Error **errp)
{
+ HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
struct iommu_hwpt_vtd_s1 vtd = {};
+ uint32_t flags = vtd_hiod->iommu_state->pasid ? IOMMU_HWPT_ALLOC_PASID : 0;
vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE : 0) |
(VTD_SM_PASID_ENTRY_WPE(pe) ? IOMMU_VTD_S1_WPE : 0) |
@@ -82,13 +84,16 @@ static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
return iommufd_backend_alloc_hwpt(hiodi->iommufd, hiodi->devid,
- hiodi->hwpt_id, 0, IOMMU_HWPT_DATA_VTD_S1,
- sizeof(vtd), &vtd, fs_hwpt_id, errp);
+ hiodi->hwpt_id, flags,
+ IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd), &vtd,
+ fs_hwpt_id, errp);
}
-static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *hiodi,
+static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
VTDAddressSpace *vtd_as)
{
+ HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
+
if (!vtd_as->fs_hwpt_id) {
return;
}
@@ -116,7 +121,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
if (vtd_pe_pgtt_is_fst(pe)) {
- if (!vtd_create_fs_hwpt(hiodi, pe, &hwpt_id, errp)) {
+ if (!vtd_create_fs_hwpt(vtd_hiod, pe, &hwpt_id, errp)) {
return false;
}
}
@@ -126,7 +131,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
trace_vtd_device_attach_hwpt(hiodi->devid, IOMMU_NO_PASID, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
- vtd_destroy_old_fs_hwpt(hiodi, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
if (vtd_pe_pgtt_is_fst(pe)) {
vtd_as->fs_hwpt_id = hwpt_id;
}
@@ -161,7 +166,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
if (ret) {
- vtd_destroy_old_fs_hwpt(hiodi, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
}
return ret;
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 05/15] intel_iommu: Rename pasid property to "pasid-bits" and define it as type uint8
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (3 preceding siblings ...)
2026-05-09 4:07 ` [PATCH v5 04/15] intel_iommu: Create the nested " Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-14 11:30 ` Yi Liu
2026-05-09 4:07 ` [PATCH v5 06/15] intel_iommu: Export some functions Zhenzhong Duan
` (9 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
'x-pasid-mode' is a bool property, we need an extra 'pss' property to
represent PASID size supported. Because there is no any device in QEMU
supporting pasid capability yet, no guest could use the pasid feature
until now, 'x-pasid-mode' takes no effect.
So instead of an extra 'pss' property we can use a single property of
uint8 type and named 'pasid-bits' to represent if pasid is supported
and the PASID bits size. A value of N > 0 means pasid is supported and
N - 1 is the value in PSS field in ECAP register.
PASID bits size should also be no more than 20 bits according to PCI spec.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 +-
include/hw/i386/intel_iommu.h | 2 +-
hw/i386/intel_iommu.c | 11 +++++++++--
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 11a53aa369..db4f186a3e 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,7 +195,7 @@
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
#define VTD_ECAP_NWFS (1ULL << 33)
-#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
+#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_PDS (1ULL << 42)
#define VTD_ECAP_SMTS (1ULL << 43)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index e44ce31841..95c76015e4 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -314,7 +314,7 @@ struct IntelIOMMUState {
bool intr_eime; /* Extended interrupt mode enabled */
OnOffAuto intr_eim; /* Toggle for EIM cabability */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
- bool pasid; /* Whether to support PASID */
+ uint8_t pasid; /* PASID supported in bits, 0 if not */
bool fs1gp; /* First Stage 1-GByte Page Support */
/* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b784c5f10a..cf275b496e 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4203,7 +4203,7 @@ static const Property vtd_properties[] = {
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, fsts, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
- DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
+ DEFINE_PROP_UINT8("pasid-bits", IntelIOMMUState, pasid, 0),
DEFINE_PROP_BOOL("svm", IntelIOMMUState, svm, false),
DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
@@ -5045,7 +5045,8 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
if (s->pasid) {
- s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
+ VTD_ECAP_SET_PSS(s, s->pasid - 1);
+ s->ecap |= VTD_ECAP_PASID;
}
}
@@ -5586,6 +5587,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return false;
}
+ if (s->pasid > PCI_EXT_CAP_PASID_MAX_WIDTH) {
+ error_setg(errp, "PASID width %d exceeds Max PASID Width %d allowed "
+ "in PCI spec", s->pasid, PCI_EXT_CAP_PASID_MAX_WIDTH);
+ return false;
+ }
+
if (s->svm) {
if (!x86_iommu->dt_supported) {
error_setg(errp, "Need to set device IOTLB for svm");
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 05/15] intel_iommu: Rename pasid property to "pasid-bits" and define it as type uint8
2026-05-09 4:07 ` [PATCH v5 05/15] intel_iommu: Rename pasid property to "pasid-bits" and define it as type uint8 Zhenzhong Duan
@ 2026-05-14 11:30 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:30 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:07, Zhenzhong Duan wrote:
> 'x-pasid-mode' is a bool property, we need an extra 'pss' property to
> represent PASID size supported. Because there is no any device in QEMU
> supporting pasid capability yet, no guest could use the pasid feature
> until now, 'x-pasid-mode' takes no effect.
>
> So instead of an extra 'pss' property we can use a single property of
> uint8 type and named 'pasid-bits' to represent if pasid is supported
> and the PASID bits size. A value of N > 0 means pasid is supported and
> N - 1 is the value in PSS field in ECAP register.
>
> PASID bits size should also be no more than 20 bits according to PCI spec.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
LGTM.
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> hw/i386/intel_iommu_internal.h | 2 +-
> include/hw/i386/intel_iommu.h | 2 +-
> hw/i386/intel_iommu.c | 11 +++++++++--
> 3 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 11a53aa369..db4f186a3e 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -195,7 +195,7 @@
> #define VTD_ECAP_MHMV (15ULL << 20)
> #define VTD_ECAP_SRS (1ULL << 31)
> #define VTD_ECAP_NWFS (1ULL << 33)
> -#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
> +#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_PDS (1ULL << 42)
> #define VTD_ECAP_SMTS (1ULL << 43)
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index e44ce31841..95c76015e4 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -314,7 +314,7 @@ struct IntelIOMMUState {
> bool intr_eime; /* Extended interrupt mode enabled */
> OnOffAuto intr_eim; /* Toggle for EIM cabability */
> uint8_t aw_bits; /* Host/IOVA address width (in bits) */
> - bool pasid; /* Whether to support PASID */
> + uint8_t pasid; /* PASID supported in bits, 0 if not */
> bool fs1gp; /* First Stage 1-GByte Page Support */
>
> /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index b784c5f10a..cf275b496e 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -4203,7 +4203,7 @@ static const Property vtd_properties[] = {
> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, fsts, FALSE),
> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> - DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> + DEFINE_PROP_UINT8("pasid-bits", IntelIOMMUState, pasid, 0),
> DEFINE_PROP_BOOL("svm", IntelIOMMUState, svm, false),
> DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
> DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
> @@ -5045,7 +5045,8 @@ static void vtd_cap_init(IntelIOMMUState *s)
> }
>
> if (s->pasid) {
> - s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
> + VTD_ECAP_SET_PSS(s, s->pasid - 1);
> + s->ecap |= VTD_ECAP_PASID;
> }
> }
>
> @@ -5586,6 +5587,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> return false;
> }
>
> + if (s->pasid > PCI_EXT_CAP_PASID_MAX_WIDTH) {
> + error_setg(errp, "PASID width %d exceeds Max PASID Width %d allowed "
> + "in PCI spec", s->pasid, PCI_EXT_CAP_PASID_MAX_WIDTH);
> + return false;
> + }
> +
> if (s->svm) {
> if (!x86_iommu->dt_supported) {
> error_setg(errp, "Need to set device IOTLB for svm");
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 06/15] intel_iommu: Export some functions
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (4 preceding siblings ...)
2026-05-09 4:07 ` [PATCH v5 05/15] intel_iommu: Rename pasid property to "pasid-bits" and define it as type uint8 Zhenzhong Duan
@ 2026-05-09 4:07 ` Zhenzhong Duan
2026-05-09 4:08 ` [PATCH v5 07/15] intel_iommu: Use IOMMU_NO_PASID and delete PASID_0 Zhenzhong Duan
` (8 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:07 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan, Clement Mathieu--Drif
Export some functions for accel code usages. Inline functions and MACROs
are moved to internal header files. Then accel code in following patches
could access them.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@eviden.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_internal.h | 31 +++++++++++++++++++++++++
hw/i386/intel_iommu.c | 42 ++++++++--------------------------
2 files changed, 40 insertions(+), 33 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index db4f186a3e..c7e107fe87 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -620,6 +620,12 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
+/* context entry operations */
+#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
+ ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+ ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
+
typedef struct VTDPASIDCacheInfo {
uint8_t type;
uint16_t did;
@@ -746,4 +752,29 @@ static inline bool vtd_pe_pgtt_is_fst(VTDPASIDEntry *pe)
{
return (VTD_SM_PASID_ENTRY_PGTT(pe) == VTD_SM_PASID_ENTRY_FST);
}
+
+static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
+{
+ return pdire->val & 1;
+}
+
+static inline bool vtd_pe_present(VTDPASIDEntry *pe)
+{
+ return pe->val[0] & VTD_PASID_ENTRY_P;
+}
+
+static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
+{
+ return memcmp(p1, p2, sizeof(*p1));
+}
+
+int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
+ VTDPASIDDirEntry *pdire);
+int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
+ dma_addr_t addr, VTDPASIDEntry *pe);
+int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+ uint8_t devfn, VTDContextEntry *ce);
+int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
+ VTDPASIDEntry *pe, uint32_t pasid);
+VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid);
#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index cf275b496e..36af13cee3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -42,12 +42,6 @@
#include "migration/vmstate.h"
#include "trace.h"
-/* context entry operations */
-#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
- ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
-#define VTD_CE_GET_PRE(ce) \
- ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
-
/*
* Paging mode for first-stage translation (VTD spec Figure 9-6)
* 00: 4-level paging, 01: 5-level paging
@@ -831,18 +825,12 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
}
}
-static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
-{
- return pdire->val & 1;
-}
-
/**
* Caller of this function should check present bit if wants
* to use pdir entry for further usage except for fpd bit check.
*/
-static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire)
+int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
+ VTDPASIDDirEntry *pdire)
{
uint32_t index;
dma_addr_t addr, entry_size;
@@ -860,15 +848,8 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
return 0;
}
-static inline bool vtd_pe_present(VTDPASIDEntry *pe)
-{
- return pe->val[0] & VTD_PASID_ENTRY_P;
-}
-
-static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
- uint32_t pasid,
- dma_addr_t addr,
- VTDPASIDEntry *pe)
+int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
+ dma_addr_t addr, VTDPASIDEntry *pe)
{
uint8_t pgtt;
uint32_t index;
@@ -954,8 +935,8 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
return 0;
}
-static int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
- VTDPASIDEntry *pe, uint32_t pasid)
+int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
+ VTDPASIDEntry *pe, uint32_t pasid)
{
dma_addr_t pasid_dir_base;
@@ -1526,8 +1507,8 @@ static int vtd_ce_pasid_0_check(IntelIOMMUState *s, VTDContextEntry *ce)
}
/* Map a device to its corresponding domain (context-entry) */
-static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
- uint8_t devfn, VTDContextEntry *ce)
+int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+ uint8_t devfn, VTDContextEntry *ce)
{
VTDRootEntry re;
int ret_fr;
@@ -1909,7 +1890,7 @@ static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
vtd_find_as_by_sid_and_pasid, &key);
}
-static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
{
return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
}
@@ -3133,11 +3114,6 @@ static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
}
-static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
-{
- return memcmp(p1, p2, sizeof(*p1));
-}
-
/* Update or invalidate pasid cache based on the pasid entry in guest memory. */
static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
gpointer user_data)
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 07/15] intel_iommu: Use IOMMU_NO_PASID and delete PASID_0
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (5 preceding siblings ...)
2026-05-09 4:07 ` [PATCH v5 06/15] intel_iommu: Export some functions Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:24 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use IOMMU_NO_PASID internally Zhenzhong Duan
` (7 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
In previous patch we introduced a global macro IOMMU_NO_PASID(0) for
the RID attachment, this makes the local macro PASID_0 redundant.
Delete it and use IOMMU_NO_PASID instead.
No functional changes intended.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 -
hw/i386/intel_iommu.c | 22 +++++++++++-----------
hw/i386/intel_iommu_accel.c | 2 +-
3 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index c7e107fe87..0141316f83 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -615,7 +615,6 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CTX_ENTRY_LEGACY_SIZE 16
#define VTD_CTX_ENTRY_SCALABLE_SIZE 32
-#define PASID_0 0
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 36af13cee3..5e5dcdc274 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -941,7 +941,7 @@ int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
dma_addr_t pasid_dir_base;
if (pasid == PCI_NO_PASID) {
- pasid = PASID_0;
+ pasid = IOMMU_NO_PASID;
}
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
return vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
@@ -958,7 +958,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
VTDPASIDEntry pe;
if (pasid == PCI_NO_PASID) {
- pasid = PASID_0;
+ pasid = IOMMU_NO_PASID;
}
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
@@ -1501,9 +1501,9 @@ static int vtd_ce_pasid_0_check(IntelIOMMUState *s, VTDContextEntry *ce)
/*
* Make sure in Scalable Mode, a present context entry
- * has valid pasid entry setting at PASID_0.
+ * has valid pasid entry setting at IOMMU_NO_PASID.
*/
- return vtd_ce_get_pasid_entry(s, ce, &pe, PASID_0);
+ return vtd_ce_get_pasid_entry(s, ce, &pe, IOMMU_NO_PASID);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -1564,7 +1564,7 @@ int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
} else {
/*
- * Check if the programming of pasid setting of PASID_0
+ * Check if the programming of pasid setting of IOMMU_NO_PASID
* is valid, and thus avoids to check pasid entry fetching
* result in future helper function calling.
*/
@@ -2122,7 +2122,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
vtd_iommu_lock(s);
if (pasid == PCI_NO_PASID && s->root_scalable) {
- pasid = PASID_0;
+ pasid = IOMMU_NO_PASID;
}
/* Try to fetch pte from IOTLB */
@@ -2487,7 +2487,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
}
/*
- * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID
+ * There is no pasid field in iotlb invalidation descriptor, so IOMMU_NO_PASID
* is passed as parameter. Piotlb invalidation supports pasid, pasid in its
* descriptor is passed which should not be PCI_NO_PASID.
*/
@@ -2508,10 +2508,10 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
* In legacy mode, vtd_as->pasid == pasid is always true.
* In scalable mode, for vtd address space backing a PCI
* device without pasid, needs to compare pasid with
- * PASID_0 of this device.
+ * IOMMU_NO_PASID of this device.
*/
if (!(vtd_as->pasid == pasid ||
- (vtd_as->pasid == PCI_NO_PASID && pasid == PASID_0))) {
+ (vtd_as->pasid == PCI_NO_PASID && pasid == IOMMU_NO_PASID))) {
continue;
}
@@ -2563,7 +2563,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
vtd_iommu_unlock(s);
- vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, IOMMU_NO_PASID);
}
/* Flush IOTLB
@@ -3022,7 +3022,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
- if ((vtd_as->pasid != PCI_NO_PASID || pasid != PASID_0) &&
+ if ((vtd_as->pasid != PCI_NO_PASID || pasid != IOMMU_NO_PASID) &&
vtd_as->pasid != pasid) {
continue;
}
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index bd1236c070..8940d240a1 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -217,7 +217,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
- if (piotlb_info->domain_id == did && piotlb_info->pasid == PASID_0) {
+ if (piotlb_info->domain_id == did && piotlb_info->pasid == IOMMU_NO_PASID) {
HostIOMMUDeviceIOMMUFD *hiodi =
HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
uint32_t entry_num = 1; /* Only implement one request for simplicity */
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 07/15] intel_iommu: Use IOMMU_NO_PASID and delete PASID_0
2026-05-09 4:08 ` [PATCH v5 07/15] intel_iommu: Use IOMMU_NO_PASID and delete PASID_0 Zhenzhong Duan
@ 2026-05-14 11:24 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:24 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> In previous patch we introduced a global macro IOMMU_NO_PASID(0) for
> the RID attachment, this makes the local macro PASID_0 redundant.
> Delete it and use IOMMU_NO_PASID instead.
>
> No functional changes intended.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 1 -
> hw/i386/intel_iommu.c | 22 +++++++++++-----------
> hw/i386/intel_iommu_accel.c | 2 +-
> 3 files changed, 12 insertions(+), 13 deletions(-)
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index c7e107fe87..0141316f83 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -615,7 +615,6 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> -#define PASID_0 0
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 36af13cee3..5e5dcdc274 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -941,7 +941,7 @@ int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
> dma_addr_t pasid_dir_base;
>
> if (pasid == PCI_NO_PASID) {
> - pasid = PASID_0;
> + pasid = IOMMU_NO_PASID;
> }
> pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
> return vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
> @@ -958,7 +958,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
> VTDPASIDEntry pe;
>
> if (pasid == PCI_NO_PASID) {
> - pasid = PASID_0;
> + pasid = IOMMU_NO_PASID;
> }
> pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>
> @@ -1501,9 +1501,9 @@ static int vtd_ce_pasid_0_check(IntelIOMMUState *s, VTDContextEntry *ce)
>
> /*
> * Make sure in Scalable Mode, a present context entry
> - * has valid pasid entry setting at PASID_0.
> + * has valid pasid entry setting at IOMMU_NO_PASID.
> */
> - return vtd_ce_get_pasid_entry(s, ce, &pe, PASID_0);
> + return vtd_ce_get_pasid_entry(s, ce, &pe, IOMMU_NO_PASID);
> }
>
> /* Map a device to its corresponding domain (context-entry) */
> @@ -1564,7 +1564,7 @@ int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> }
> } else {
> /*
> - * Check if the programming of pasid setting of PASID_0
> + * Check if the programming of pasid setting of IOMMU_NO_PASID
> * is valid, and thus avoids to check pasid entry fetching
> * result in future helper function calling.
> */
> @@ -2122,7 +2122,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> vtd_iommu_lock(s);
>
> if (pasid == PCI_NO_PASID && s->root_scalable) {
> - pasid = PASID_0;
> + pasid = IOMMU_NO_PASID;
> }
>
> /* Try to fetch pte from IOTLB */
> @@ -2487,7 +2487,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
> }
>
> /*
> - * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID
> + * There is no pasid field in iotlb invalidation descriptor, so IOMMU_NO_PASID
> * is passed as parameter. Piotlb invalidation supports pasid, pasid in its
> * descriptor is passed which should not be PCI_NO_PASID.
> */
> @@ -2508,10 +2508,10 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
> * In legacy mode, vtd_as->pasid == pasid is always true.
> * In scalable mode, for vtd address space backing a PCI
> * device without pasid, needs to compare pasid with
> - * PASID_0 of this device.
> + * IOMMU_NO_PASID of this device.
> */
> if (!(vtd_as->pasid == pasid ||
> - (vtd_as->pasid == PCI_NO_PASID && pasid == PASID_0))) {
> + (vtd_as->pasid == PCI_NO_PASID && pasid == IOMMU_NO_PASID))) {
> continue;
> }
>
> @@ -2563,7 +2563,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> vtd_iommu_lock(s);
> g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
> vtd_iommu_unlock(s);
> - vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
> + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, IOMMU_NO_PASID);
> }
>
> /* Flush IOTLB
> @@ -3022,7 +3022,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> vtd_as->devfn, &ce) &&
> domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> - if ((vtd_as->pasid != PCI_NO_PASID || pasid != PASID_0) &&
> + if ((vtd_as->pasid != PCI_NO_PASID || pasid != IOMMU_NO_PASID) &&
> vtd_as->pasid != pasid) {
> continue;
> }
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index bd1236c070..8940d240a1 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -217,7 +217,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
>
> did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
>
> - if (piotlb_info->domain_id == did && piotlb_info->pasid == PASID_0) {
> + if (piotlb_info->domain_id == did && piotlb_info->pasid == IOMMU_NO_PASID) {
> HostIOMMUDeviceIOMMUFD *hiodi =
> HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> uint32_t entry_num = 1; /* Only implement one request for simplicity */
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use IOMMU_NO_PASID internally
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (6 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 07/15] intel_iommu: Use IOMMU_NO_PASID and delete PASID_0 Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:25 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
` (6 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan, Philippe Mathieu-Daudé
The PCI subsystem uses PCI_NO_PASID for requests-without-PASID, but VT-d
uses IOMMU_NO_PASID internally. This leads to conversion and checking code
between PCI_NO_PASID and IOMMU_NO_PASID throughout the implementation.
Refactor to use IOMMU PASID consistently within Intel IOMMU by storing
IOMMU PASID value in vtd_as->pasid. After this change, PCI_NO_PASID is
only used at three boundary points:
1. PCI_NO_PASID -> IOMMU_NO_PASID: Convert PCI PASID to IOMMU PASID in
vtd_find_add_as() and cache in vtd_as->pasid.
2. IOMMU_NO_PASID -> PCI_NO_PASID: Convert when notifying UNMAP events
via memory_region_notify_iommu() and returning IOMMUTLBEntry in
vtd_iommu_translate().
This eliminates conversion/checks in PASID table lookups, simplifies
invalidation logic with consistent PASID values, and improves code
readability. The PCI subsystem interface remains unchanged to maintain
compatibility with other IOMMU implementations that may not use PASID 0
for requests-without-PASID.
Suggested-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/system/memory.h | 2 +-
hw/i386/intel_iommu.c | 164 +++++++++++++++++-------------------
hw/i386/intel_iommu_accel.c | 2 +-
3 files changed, 80 insertions(+), 88 deletions(-)
diff --git a/include/system/memory.h b/include/system/memory.h
index 1417132f6d..1edb38b07d 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -150,7 +150,7 @@ struct IOMMUTLBEntry {
hwaddr translated_addr;
hwaddr addr_mask; /* 0xfff = 4k translation */
IOMMUAccessFlags perm;
- uint32_t pasid;
+ uint32_t pasid; /* PCI pasid */
};
/*
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5e5dcdc274..b50c556c40 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -938,12 +938,8 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
VTDPASIDEntry *pe, uint32_t pasid)
{
- dma_addr_t pasid_dir_base;
+ dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
- if (pasid == PCI_NO_PASID) {
- pasid = IOMMU_NO_PASID;
- }
- pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
return vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
}
@@ -953,15 +949,10 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
uint32_t pasid)
{
int ret;
- dma_addr_t pasid_dir_base;
+ dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
VTDPASIDDirEntry pdire;
VTDPASIDEntry pe;
- if (pasid == PCI_NO_PASID) {
- pasid = IOMMU_NO_PASID;
- }
- pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
-
/*
* No present bit check since fpd is meaningful even
* if the present bit is clear.
@@ -1750,7 +1741,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
*
* Need to disable ir for as with PASID.
*/
- if (as->pasid != PCI_NO_PASID) {
+ if (as->pasid != IOMMU_NO_PASID) {
memory_region_set_enabled(&as->iommu_ir, false);
} else {
memory_region_set_enabled(&as->iommu_ir, true);
@@ -1780,7 +1771,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
* We enable per as memory region (iommu_ir_fault) for catching
* the translation for interrupt range through PASID + PT.
*/
- if (pt && as->pasid != PCI_NO_PASID) {
+ if (pt && as->pasid != IOMMU_NO_PASID) {
memory_region_set_enabled(&as->iommu_ir_fault, true);
} else {
memory_region_set_enabled(&as->iommu_ir_fault, false);
@@ -1892,7 +1883,7 @@ static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
{
- return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
+ return vtd_get_as_by_sid_and_pasid(s, sid, IOMMU_NO_PASID);
}
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
@@ -2121,10 +2112,6 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
vtd_iommu_lock(s);
- if (pasid == PCI_NO_PASID && s->root_scalable) {
- pasid = IOMMU_NO_PASID;
- }
-
/* Try to fetch pte from IOTLB */
iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
if (iotlb_entry) {
@@ -2235,7 +2222,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
if (ret_fr) {
if (!vtd_is_recoverable_fault(-ret_fr, iommu_idx)) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
- addr, is_write, pasid != PCI_NO_PASID, pasid);
+ addr, is_write, s->root_scalable, pasid);
}
goto error;
}
@@ -2489,7 +2476,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
/*
* There is no pasid field in iotlb invalidation descriptor, so IOMMU_NO_PASID
* is passed as parameter. Piotlb invalidation supports pasid, pasid in its
- * descriptor is passed which should not be PCI_NO_PASID.
+ * descriptor is passed.
*/
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
@@ -2503,48 +2490,41 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
- if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ if (ret || vtd_as->pasid != pasid ||
+ domain_id != vtd_get_domain_id(s, &ce, pasid)) {
+ continue;
+ }
+
+ if (vtd_as_has_map_notifier(vtd_as)) {
/*
- * In legacy mode, vtd_as->pasid == pasid is always true.
- * In scalable mode, for vtd address space backing a PCI
- * device without pasid, needs to compare pasid with
- * IOMMU_NO_PASID of this device.
+ * When first stage translation is off, as long as we have MAP
+ * notifications registered in any of our IOMMU notifiers,
+ * we need to sync the shadow page table. Otherwise VFIO
+ * device attaches to nested page table instead of shadow
+ * page table, so no need to sync.
*/
- if (!(vtd_as->pasid == pasid ||
- (vtd_as->pasid == PCI_NO_PASID && pasid == IOMMU_NO_PASID))) {
- continue;
- }
-
- if (vtd_as_has_map_notifier(vtd_as)) {
- /*
- * When first stage translation is off, as long as we have MAP
- * notifications registered in any of our IOMMU notifiers,
- * we need to sync the shadow page table. Otherwise VFIO
- * device attaches to nested page table instead of shadow
- * page table, so no need to sync.
- */
- if (!s->fsts || !s->root_scalable) {
- vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
- }
- } else {
- /*
- * For UNMAP-only notifiers, we don't need to walk the
- * page tables. We just deliver the PSI down to
- * invalidate caches.
- */
- const IOMMUTLBEvent event = {
- .type = IOMMU_NOTIFIER_UNMAP,
- .entry = {
- .target_as = &address_space_memory,
- .iova = addr,
- .translated_addr = 0,
- .addr_mask = size - 1,
- .perm = IOMMU_NONE,
- .pasid = vtd_as->pasid,
- },
- };
- memory_region_notify_iommu(&vtd_as->iommu, 0, event);
+ if (!s->fsts || !s->root_scalable) {
+ vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
}
+ } else {
+ /*
+ * For UNMAP-only notifiers, we don't need to walk the
+ * page tables. We just deliver the PSI down to
+ * invalidate caches.
+ */
+ const IOMMUTLBEvent event = {
+ .type = IOMMU_NOTIFIER_UNMAP,
+ .entry = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = size - 1,
+ .perm = IOMMU_NONE,
+ /* Other sub-systems use PCI pasid */
+ .pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid,
+ },
+ };
+ memory_region_notify_iommu(&vtd_as->iommu, 0, event);
}
}
}
@@ -3007,6 +2987,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
VTDIOTLBPageInvInfo info;
VTDAddressSpace *vtd_as;
VTDContextEntry ce;
+ int ret;
info.domain_id = domain_id;
info.pasid = pasid;
@@ -3019,17 +3000,15 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
vtd_iommu_unlock(s);
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
- if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
- vtd_as->devfn, &ce) &&
- domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
- if ((vtd_as->pasid != PCI_NO_PASID || pasid != IOMMU_NO_PASID) &&
- vtd_as->pasid != pasid) {
- continue;
- }
+ ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce);
+ if (ret || vtd_as->pasid != pasid ||
+ domain_id != vtd_get_domain_id(s, &ce, pasid)) {
+ continue;
+ }
- if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
- vtd_address_space_sync(vtd_as);
- }
+ if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
+ vtd_address_space_sync(vtd_as);
}
}
}
@@ -3239,7 +3218,7 @@ static bool vtd_process_pasid_desc(IntelIOMMUState *s,
/* PASID selective implies a DID selective */
trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
pc_info.did = did;
- pc_info.pasid = pasid ?: PCI_NO_PASID;
+ pc_info.pasid = pasid;
break;
case VTD_INV_DESC_PASIDC_G_GLOBAL:
@@ -3291,6 +3270,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
* ...
*/
+ uint32_t pasid = vtd_dev_as->pasid;
IOMMUTLBEvent event;
uint64_t sz;
@@ -3307,7 +3287,8 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
event.entry.iova = addr;
event.entry.perm = IOMMU_NONE;
event.entry.translated_addr = 0;
- event.entry.pasid = vtd_dev_as->pasid;
+ /* Other sub-systems use PCI pasid */
+ event.entry.pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid;
memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
}
@@ -3335,7 +3316,7 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
if (global) {
QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
- if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
+ if ((vtd_dev_as->pasid != IOMMU_NO_PASID) &&
(PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
vtd_dev_as->devfn) == sid)) {
do_invalidate_device_tlb(vtd_dev_as, size, addr);
@@ -3983,13 +3964,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
}
static void vtd_prepare_identity_entry(hwaddr addr, IOMMUAccessFlags perm,
- uint32_t pasid, IOMMUTLBEntry *iotlb)
+ IOMMUTLBEntry *iotlb)
{
iotlb->iova = addr & VTD_PAGE_MASK_4K;
iotlb->translated_addr = addr & VTD_PAGE_MASK_4K;
iotlb->addr_mask = ~VTD_PAGE_MASK_4K;
iotlb->perm = perm;
- iotlb->pasid = pasid;
}
static inline void vtd_prepare_error_entry(IOMMUTLBEntry *entry)
@@ -4001,6 +3981,10 @@ static inline void vtd_prepare_error_entry(IOMMUTLBEntry *entry)
entry->pasid = PCI_NO_PASID;
}
+/*
+ * This function returns translation result to other sub-system such as PCI,
+ * so iommu pasid is converted to PCI pasid and returned in IOMMUTLBEntry.
+ */
static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
IOMMUAccessFlags flag, int iommu_idx)
{
@@ -4009,7 +3993,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
IOMMUTLBEntry iotlb = {
/* We'll fill in the rest later. */
.target_as = &address_space_memory,
- .pasid = vtd_as->pasid,
+ .pasid = vtd_as->pasid == IOMMU_NO_PASID ? PCI_NO_PASID : vtd_as->pasid,
};
bool success;
bool is_write = flag & IOMMU_WO;
@@ -4017,9 +4001,8 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
if (likely(s->dmar_enabled)) {
/* Only support translated requests in scalable mode */
if (iommu_idx == VTD_IDX_TRANSLATED && s->root_scalable) {
- if (vtd_as->pasid == PCI_NO_PASID) {
- vtd_prepare_identity_entry(addr, IOMMU_RW, PCI_NO_PASID,
- &iotlb);
+ if (vtd_as->pasid == IOMMU_NO_PASID) {
+ vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
success = true;
} else {
vtd_prepare_error_entry(&iotlb);
@@ -4034,7 +4017,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
}
} else {
/* DMAR disabled, passthrough, use 4k-page*/
- vtd_prepare_identity_entry(addr, IOMMU_RW, vtd_as->pasid, &iotlb);
+ vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
success = true;
}
@@ -4460,7 +4443,7 @@ static void vtd_report_sid_ir_illegal_access(IntelIOMMUState *s, uint16_t sid,
}
vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, is_fpd_set, sid, addr,
- is_write, pasid != PCI_NO_PASID, pasid);
+ is_write, pasid != IOMMU_NO_PASID, pasid);
}
static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
@@ -4488,7 +4471,6 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
int ret = 0;
MSIMessage from = {}, to = {};
uint16_t sid = X86_IOMMU_SID_INVALID;
- uint32_t pasid;
from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
from.data = (uint32_t) value;
@@ -4496,11 +4478,11 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
if (!attrs.unspecified) {
/* We have explicit Source ID */
sid = attrs.requester_id;
- pasid = attrs.pid != 0 ? attrs.pid : PCI_NO_PASID;
if (attrs.address_type == PCI_AT_TRANSLATED &&
sid != X86_IOMMU_SID_INVALID) {
- vtd_report_sid_ir_illegal_access(s, sid, pasid, from.address, true);
+ vtd_report_sid_ir_illegal_access(s, sid, attrs.pid, from.address,
+ true);
return MEMTX_ERROR;
}
}
@@ -4562,9 +4544,19 @@ static const MemoryRegionOps vtd_mem_ir_fault_ops = {
},
};
+/*
+ * This function is called by many PCIIOMMUOps callbacks to get
+ * VTDAddressSpace or create one if non-exist. Those callbacks are
+ * used by PCI sub-system and are passed in a PCI pasid value.
+ *
+ * VTD honors iommu pasid, so the first thing is to convert PCI
+ * pasid to iommu pasid.
+ */
VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
int devfn, unsigned int pasid)
{
+ pasid = pasid == PCI_NO_PASID ? IOMMU_NO_PASID : pasid;
+
/*
* We can't simply use sid here since the bus number might not be
* initialized by the guest.
@@ -4606,7 +4598,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
new_key->devfn = devfn;
new_key->pasid = pasid;
- if (pasid == PCI_NO_PASID) {
+ if (pasid == IOMMU_NO_PASID) {
snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
PCI_FUNC(devfn));
} else {
@@ -5290,7 +5282,7 @@ error_get_fpd_and_report:
vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
error_report:
vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
- vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
+ vtd_as->pasid != IOMMU_NO_PASID, vtd_as->pasid);
return false;
}
@@ -5381,7 +5373,7 @@ static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn,
*/
/* We do not support PRI without PASID */
- if (vtd_as->pasid == PCI_NO_PASID) {
+ if (vtd_as->pasid == IOMMU_NO_PASID) {
return -EPERM;
}
if (exec_req && !is_read) {
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 8940d240a1..10bdbba632 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -207,7 +207,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
return;
}
- assert(vtd_as->pasid == PCI_NO_PASID);
+ assert(vtd_as->pasid == IOMMU_NO_PASID);
/* Nothing to do if there is no first stage HWPT attached */
if (!pc_entry->valid ||
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use IOMMU_NO_PASID internally
2026-05-09 4:08 ` [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use IOMMU_NO_PASID internally Zhenzhong Duan
@ 2026-05-14 11:25 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:25 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao,
Philippe Mathieu-Daudé
On 5/9/26 12:08, Zhenzhong Duan wrote:
> The PCI subsystem uses PCI_NO_PASID for requests-without-PASID, but VT-d
> uses IOMMU_NO_PASID internally. This leads to conversion and checking code
s/VT-d uses IOMMU_NO_PASID internally/VT-d emulation uses IOMMU_NO_PASID
internally (ecap.RPS==0)/
> between PCI_NO_PASID and IOMMU_NO_PASID throughout the implementation.
>
> Refactor to use IOMMU PASID consistently within Intel IOMMU by storing
> IOMMU PASID value in vtd_as->pasid. After this change, PCI_NO_PASID is
> only used at three boundary points:
a typo or the third boundary is missed?
>
> 1. PCI_NO_PASID -> IOMMU_NO_PASID: Convert PCI PASID to IOMMU PASID in
> vtd_find_add_as() and cache in vtd_as->pasid.
> 2. IOMMU_NO_PASID -> PCI_NO_PASID: Convert when notifying UNMAP events
> via memory_region_notify_iommu() and returning IOMMUTLBEntry in
> vtd_iommu_translate().
>
> This eliminates conversion/checks in PASID table lookups, simplifies
> invalidation logic with consistent PASID values, and improves code
> readability. The PCI subsystem interface remains unchanged to maintain
> compatibility with other IOMMU implementations that may not use PASID 0
> for requests-without-PASID.
>
> Suggested-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/system/memory.h | 2 +-
> hw/i386/intel_iommu.c | 164 +++++++++++++++++-------------------
> hw/i386/intel_iommu_accel.c | 2 +-
> 3 files changed, 80 insertions(+), 88 deletions(-)
>
> diff --git a/include/system/memory.h b/include/system/memory.h
> index 1417132f6d..1edb38b07d 100644
> --- a/include/system/memory.h
> +++ b/include/system/memory.h
> @@ -150,7 +150,7 @@ struct IOMMUTLBEntry {
> hwaddr translated_addr;
> hwaddr addr_mask; /* 0xfff = 4k translation */
> IOMMUAccessFlags perm;
> - uint32_t pasid;
> + uint32_t pasid; /* PCI pasid */
> };
>
> /*
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 5e5dcdc274..b50c556c40 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -938,12 +938,8 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
> VTDPASIDEntry *pe, uint32_t pasid)
> {
> - dma_addr_t pasid_dir_base;
> + dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>
> - if (pasid == PCI_NO_PASID) {
> - pasid = IOMMU_NO_PASID;
> - }
> - pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
> return vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
> }
>
> @@ -953,15 +949,10 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
> uint32_t pasid)
> {
> int ret;
> - dma_addr_t pasid_dir_base;
> + dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
> VTDPASIDDirEntry pdire;
> VTDPASIDEntry pe;
>
> - if (pasid == PCI_NO_PASID) {
> - pasid = IOMMU_NO_PASID;
> - }
> - pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
> -
> /*
> * No present bit check since fpd is meaningful even
> * if the present bit is clear.
> @@ -1750,7 +1741,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
> *
> * Need to disable ir for as with PASID.
> */
> - if (as->pasid != PCI_NO_PASID) {
> + if (as->pasid != IOMMU_NO_PASID) {
> memory_region_set_enabled(&as->iommu_ir, false);
> } else {
> memory_region_set_enabled(&as->iommu_ir, true);
> @@ -1780,7 +1771,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
> * We enable per as memory region (iommu_ir_fault) for catching
> * the translation for interrupt range through PASID + PT.
> */
> - if (pt && as->pasid != PCI_NO_PASID) {
> + if (pt && as->pasid != IOMMU_NO_PASID) {
> memory_region_set_enabled(&as->iommu_ir_fault, true);
> } else {
> memory_region_set_enabled(&as->iommu_ir_fault, false);
> @@ -1892,7 +1883,7 @@ static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
>
> VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> {
> - return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
> + return vtd_get_as_by_sid_and_pasid(s, sid, IOMMU_NO_PASID);
> }
>
> static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
> @@ -2121,10 +2112,6 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>
> vtd_iommu_lock(s);
>
> - if (pasid == PCI_NO_PASID && s->root_scalable) {
> - pasid = IOMMU_NO_PASID;
> - }
> -
> /* Try to fetch pte from IOTLB */
> iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
> if (iotlb_entry) {
> @@ -2235,7 +2222,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> if (ret_fr) {
> if (!vtd_is_recoverable_fault(-ret_fr, iommu_idx)) {
> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
> - addr, is_write, pasid != PCI_NO_PASID, pasid);
> + addr, is_write, s->root_scalable, pasid);
a typo here? s->root_scalable should be "pasid != IOMMU_NO_PASID"?
Other part LGTM.
Regards,
Yi Liu
> }
> goto error;
> }
> @@ -2489,7 +2476,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
> /*
> * There is no pasid field in iotlb invalidation descriptor, so IOMMU_NO_PASID
> * is passed as parameter. Piotlb invalidation supports pasid, pasid in its
> - * descriptor is passed which should not be PCI_NO_PASID.
> + * descriptor is passed.
> */
> static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
> uint16_t domain_id, hwaddr addr,
> @@ -2503,48 +2490,41 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
> QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
> ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> vtd_as->devfn, &ce);
> - if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> + if (ret || vtd_as->pasid != pasid ||
> + domain_id != vtd_get_domain_id(s, &ce, pasid)) {
> + continue;
> + }
> +
> + if (vtd_as_has_map_notifier(vtd_as)) {
> /*
> - * In legacy mode, vtd_as->pasid == pasid is always true.
> - * In scalable mode, for vtd address space backing a PCI
> - * device without pasid, needs to compare pasid with
> - * IOMMU_NO_PASID of this device.
> + * When first stage translation is off, as long as we have MAP
> + * notifications registered in any of our IOMMU notifiers,
> + * we need to sync the shadow page table. Otherwise VFIO
> + * device attaches to nested page table instead of shadow
> + * page table, so no need to sync.
> */
> - if (!(vtd_as->pasid == pasid ||
> - (vtd_as->pasid == PCI_NO_PASID && pasid == IOMMU_NO_PASID))) {
> - continue;
> - }
> -
> - if (vtd_as_has_map_notifier(vtd_as)) {
> - /*
> - * When first stage translation is off, as long as we have MAP
> - * notifications registered in any of our IOMMU notifiers,
> - * we need to sync the shadow page table. Otherwise VFIO
> - * device attaches to nested page table instead of shadow
> - * page table, so no need to sync.
> - */
> - if (!s->fsts || !s->root_scalable) {
> - vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
> - }
> - } else {
> - /*
> - * For UNMAP-only notifiers, we don't need to walk the
> - * page tables. We just deliver the PSI down to
> - * invalidate caches.
> - */
> - const IOMMUTLBEvent event = {
> - .type = IOMMU_NOTIFIER_UNMAP,
> - .entry = {
> - .target_as = &address_space_memory,
> - .iova = addr,
> - .translated_addr = 0,
> - .addr_mask = size - 1,
> - .perm = IOMMU_NONE,
> - .pasid = vtd_as->pasid,
> - },
> - };
> - memory_region_notify_iommu(&vtd_as->iommu, 0, event);
> + if (!s->fsts || !s->root_scalable) {
> + vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
> }
> + } else {
> + /*
> + * For UNMAP-only notifiers, we don't need to walk the
> + * page tables. We just deliver the PSI down to
> + * invalidate caches.
> + */
> + const IOMMUTLBEvent event = {
> + .type = IOMMU_NOTIFIER_UNMAP,
> + .entry = {
> + .target_as = &address_space_memory,
> + .iova = addr,
> + .translated_addr = 0,
> + .addr_mask = size - 1,
> + .perm = IOMMU_NONE,
> + /* Other sub-systems use PCI pasid */
> + .pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid,
> + },
> + };
> + memory_region_notify_iommu(&vtd_as->iommu, 0, event);
> }
> }
> }
> @@ -3007,6 +2987,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> VTDIOTLBPageInvInfo info;
> VTDAddressSpace *vtd_as;
> VTDContextEntry ce;
> + int ret;
>
> info.domain_id = domain_id;
> info.pasid = pasid;
> @@ -3019,17 +3000,15 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> vtd_iommu_unlock(s);
>
> QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> - if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> - vtd_as->devfn, &ce) &&
> - domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> - if ((vtd_as->pasid != PCI_NO_PASID || pasid != IOMMU_NO_PASID) &&
> - vtd_as->pasid != pasid) {
> - continue;
> - }
> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> + vtd_as->devfn, &ce);
> + if (ret || vtd_as->pasid != pasid ||
> + domain_id != vtd_get_domain_id(s, &ce, pasid)) {
> + continue;
> + }
>
> - if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
> - vtd_address_space_sync(vtd_as);
> - }
> + if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
> + vtd_address_space_sync(vtd_as);
> }
> }
> }
> @@ -3239,7 +3218,7 @@ static bool vtd_process_pasid_desc(IntelIOMMUState *s,
> /* PASID selective implies a DID selective */
> trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
> pc_info.did = did;
> - pc_info.pasid = pasid ?: PCI_NO_PASID;
> + pc_info.pasid = pasid;
> break;
>
> case VTD_INV_DESC_PASIDC_G_GLOBAL:
> @@ -3291,6 +3270,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
> * ...
> */
>
> + uint32_t pasid = vtd_dev_as->pasid;
> IOMMUTLBEvent event;
> uint64_t sz;
>
> @@ -3307,7 +3287,8 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
> event.entry.iova = addr;
> event.entry.perm = IOMMU_NONE;
> event.entry.translated_addr = 0;
> - event.entry.pasid = vtd_dev_as->pasid;
> + /* Other sub-systems use PCI pasid */
> + event.entry.pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid;
> memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
> }
>
> @@ -3335,7 +3316,7 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
> sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
> if (global) {
> QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
> - if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
> + if ((vtd_dev_as->pasid != IOMMU_NO_PASID) &&
> (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
> vtd_dev_as->devfn) == sid)) {
> do_invalidate_device_tlb(vtd_dev_as, size, addr);
> @@ -3983,13 +3964,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
> }
>
> static void vtd_prepare_identity_entry(hwaddr addr, IOMMUAccessFlags perm,
> - uint32_t pasid, IOMMUTLBEntry *iotlb)
> + IOMMUTLBEntry *iotlb)
> {
> iotlb->iova = addr & VTD_PAGE_MASK_4K;
> iotlb->translated_addr = addr & VTD_PAGE_MASK_4K;
> iotlb->addr_mask = ~VTD_PAGE_MASK_4K;
> iotlb->perm = perm;
> - iotlb->pasid = pasid;
> }
>
> static inline void vtd_prepare_error_entry(IOMMUTLBEntry *entry)
> @@ -4001,6 +3981,10 @@ static inline void vtd_prepare_error_entry(IOMMUTLBEntry *entry)
> entry->pasid = PCI_NO_PASID;
> }
>
> +/*
> + * This function returns translation result to other sub-system such as PCI,
> + * so iommu pasid is converted to PCI pasid and returned in IOMMUTLBEntry.
> + */
> static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
> IOMMUAccessFlags flag, int iommu_idx)
> {
> @@ -4009,7 +3993,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
> IOMMUTLBEntry iotlb = {
> /* We'll fill in the rest later. */
> .target_as = &address_space_memory,
> - .pasid = vtd_as->pasid,
> + .pasid = vtd_as->pasid == IOMMU_NO_PASID ? PCI_NO_PASID : vtd_as->pasid,
> };
> bool success;
> bool is_write = flag & IOMMU_WO;
> @@ -4017,9 +4001,8 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
> if (likely(s->dmar_enabled)) {
> /* Only support translated requests in scalable mode */
> if (iommu_idx == VTD_IDX_TRANSLATED && s->root_scalable) {
> - if (vtd_as->pasid == PCI_NO_PASID) {
> - vtd_prepare_identity_entry(addr, IOMMU_RW, PCI_NO_PASID,
> - &iotlb);
> + if (vtd_as->pasid == IOMMU_NO_PASID) {
> + vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
> success = true;
> } else {
> vtd_prepare_error_entry(&iotlb);
> @@ -4034,7 +4017,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
> }
> } else {
> /* DMAR disabled, passthrough, use 4k-page*/
> - vtd_prepare_identity_entry(addr, IOMMU_RW, vtd_as->pasid, &iotlb);
> + vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
> success = true;
> }
>
> @@ -4460,7 +4443,7 @@ static void vtd_report_sid_ir_illegal_access(IntelIOMMUState *s, uint16_t sid,
> }
>
> vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, is_fpd_set, sid, addr,
> - is_write, pasid != PCI_NO_PASID, pasid);
> + is_write, pasid != IOMMU_NO_PASID, pasid);
> }
>
> static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
> @@ -4488,7 +4471,6 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
> int ret = 0;
> MSIMessage from = {}, to = {};
> uint16_t sid = X86_IOMMU_SID_INVALID;
> - uint32_t pasid;
>
> from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
> from.data = (uint32_t) value;
> @@ -4496,11 +4478,11 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
> if (!attrs.unspecified) {
> /* We have explicit Source ID */
> sid = attrs.requester_id;
> - pasid = attrs.pid != 0 ? attrs.pid : PCI_NO_PASID;
>
> if (attrs.address_type == PCI_AT_TRANSLATED &&
> sid != X86_IOMMU_SID_INVALID) {
> - vtd_report_sid_ir_illegal_access(s, sid, pasid, from.address, true);
> + vtd_report_sid_ir_illegal_access(s, sid, attrs.pid, from.address,
> + true);
> return MEMTX_ERROR;
> }
> }
> @@ -4562,9 +4544,19 @@ static const MemoryRegionOps vtd_mem_ir_fault_ops = {
> },
> };
>
> +/*
> + * This function is called by many PCIIOMMUOps callbacks to get
> + * VTDAddressSpace or create one if non-exist. Those callbacks are
> + * used by PCI sub-system and are passed in a PCI pasid value.
> + *
> + * VTD honors iommu pasid, so the first thing is to convert PCI
> + * pasid to iommu pasid.
> + */
> VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
> int devfn, unsigned int pasid)
> {
> + pasid = pasid == PCI_NO_PASID ? IOMMU_NO_PASID : pasid;
> +
> /*
> * We can't simply use sid here since the bus number might not be
> * initialized by the guest.
> @@ -4606,7 +4598,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
> new_key->devfn = devfn;
> new_key->pasid = pasid;
>
> - if (pasid == PCI_NO_PASID) {
> + if (pasid == IOMMU_NO_PASID) {
> snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
> PCI_FUNC(devfn));
> } else {
> @@ -5290,7 +5282,7 @@ error_get_fpd_and_report:
> vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
> error_report:
> vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
> - vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
> + vtd_as->pasid != IOMMU_NO_PASID, vtd_as->pasid);
> return false;
> }
>
> @@ -5381,7 +5373,7 @@ static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn,
> */
>
> /* We do not support PRI without PASID */
> - if (vtd_as->pasid == PCI_NO_PASID) {
> + if (vtd_as->pasid == IOMMU_NO_PASID) {
> return -EPERM;
> }
> if (exec_req && !is_read) {
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 8940d240a1..10bdbba632 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -207,7 +207,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
> return;
> }
>
> - assert(vtd_as->pasid == PCI_NO_PASID);
> + assert(vtd_as->pasid == IOMMU_NO_PASID);
>
> /* Nothing to do if there is no first stage HWPT attached */
> if (!pc_entry->valid ||
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition for pc_inv_dsc request
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (7 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use IOMMU_NO_PASID internally Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:25 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 10/15] intel_iommu_accel: Handle PASID entry removal " Zhenzhong Duan
` (5 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
Structure VTDAddressSpace includes some elements suitable for emulated
device and passthrough device without PASID, e.g., address space,
different memory regions, etc, it is also protected by vtd iommu lock,
all these are useless and become a burden for passthrough device with
PASID.
When there are lots of PASIDs used in one device, the AS and MRs are
all registered to memory core and impact the whole system performance.
So instead of using VTDAddressSpace to cache pasid entry for each pasid
of a passthrough device, we define a light weight structure
VTDAccelPASIDCacheEntry with only necessary elements for each pasid. We
will use this struct as a parameter to conduct binding/unbinding to
nested hwpt and to record the current bound nested hwpt. It's also
designed to support IOMMU_NO_PASID.
VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c,
similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c
When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
(pasid cache invalidation) request, walk through each pasid in each
passthrough device for valid pasid entries, create a new
VTDAccelPASIDCacheEntry if not existing yet.
IOMMU_NO_PASID of passthrough device still need to register MRs in case
guest does not operate in scalable mode. So for IOMMU_NO_PASID, we have
both VTDAPASIDCacheEntry and VTDAccelPASIDCacheEntry.
Co-developed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_accel.h | 13 +++
hw/i386/intel_iommu_internal.h | 8 ++
hw/i386/intel_iommu.c | 3 +
hw/i386/intel_iommu_accel.c | 156 +++++++++++++++++++++++++++++++++
4 files changed, 180 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index e5f0b077b4..c9b1823745 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -12,6 +12,13 @@
#define HW_I386_INTEL_IOMMU_ACCEL_H
#include CONFIG_DEVICES
+typedef struct VTDAccelPASIDCacheEntry {
+ VTDHostIOMMUDevice *vtd_hiod;
+ VTDPASIDEntry pasid_entry;
+ uint32_t pasid;
+ QLIST_ENTRY(VTDAccelPASIDCacheEntry) next;
+} VTDAccelPASIDCacheEntry;
+
#ifdef CONFIG_VTD_ACCEL
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp);
@@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
+void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
#else
static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
@@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
{
}
+static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s,
+ VTDPASIDCacheInfo *pc_info)
+{
+}
+
static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
{
}
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0141316f83..623dc24760 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -615,6 +615,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CTX_ENTRY_LEGACY_SIZE 16
#define VTD_CTX_ENTRY_SCALABLE_SIZE 32
+#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
@@ -645,6 +646,7 @@ typedef struct VTDPIOTLBInvInfo {
#define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
#define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
#define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
#define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
#define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
@@ -710,6 +712,7 @@ typedef struct VTDHostIOMMUDevice {
PCIBus *bus;
uint8_t devfn;
HostIOMMUDevice *hiod;
+ QLIST_HEAD(, VTDAccelPASIDCacheEntry) pasid_cache_list;
} VTDHostIOMMUDevice;
/*
@@ -767,6 +770,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
return memcmp(p1, p2, sizeof(*p1));
}
+static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
+{
+ return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
+}
+
int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
VTDPASIDDirEntry *pdire);
int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b50c556c40..e1e32959d3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3181,6 +3181,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
pc_info);
vtd_iommu_unlock(s);
+
+ vtd_accel_pasid_cache_sync(s, pc_info);
}
static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
@@ -4751,6 +4753,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
vtd_hiod->devfn = (uint8_t)devfn;
vtd_hiod->iommu_state = s;
vtd_hiod->hiod = hiod;
+ QLIST_INIT(&vtd_hiod->pasid_cache_list);
if (!vtd_check_hiod(s, vtd_hiod, errp)) {
g_free(vtd_hiod);
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 10bdbba632..a66d63b4c8 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -259,6 +259,162 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
vtd_flush_host_piotlb_locked, &piotlb_info);
}
+static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
+ VTDPASIDEntry *pe)
+{
+ VTDAccelPASIDCacheEntry *vtd_pce;
+
+ QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
+ if (vtd_pce->pasid == pasid) {
+ if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) {
+ vtd_pce->pasid_entry = *pe;
+ }
+ return;
+ }
+ }
+
+ vtd_pce = g_malloc0(sizeof(VTDAccelPASIDCacheEntry));
+ vtd_pce->vtd_hiod = vtd_hiod;
+ vtd_pce->pasid = pasid;
+ vtd_pce->pasid_entry = *pe;
+ QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
+}
+
+/*
+ * This function walks over PASID range within [start, end) in a single
+ * PASID table for entries matching @info type/did, then create
+ * VTDAccelPASIDCacheEntry if not exist yet.
+ */
+static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
+ dma_addr_t pt_base, int start, int end,
+ VTDPASIDCacheInfo *info)
+{
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ VTDPASIDEntry pe;
+ int pasid;
+
+ for (pasid = start; pasid < end; pasid++) {
+ if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
+ !vtd_pe_present(&pe)) {
+ continue;
+ }
+
+ if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
+ info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
+ (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
+ /*
+ * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
+ * requires domain id check. If domain id check fail,
+ * go to next pasid.
+ */
+ continue;
+ }
+
+ vtd_accel_fill_pc(vtd_hiod, pasid, &pe);
+ }
+}
+
+/*
+ * In VT-d scalable mode translation, PASID dir + PASID table is used.
+ * This function aims at looping over a range of PASIDs in the given
+ * two level table to identify the pasid config in guest.
+ */
+static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
+ dma_addr_t pdt_base, int start, int end,
+ VTDPASIDCacheInfo *info)
+{
+ VTDPASIDDirEntry pdire;
+ int pasid = start;
+ int pasid_next;
+ dma_addr_t pt_base;
+
+ while (pasid < end) {
+ pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
+ ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
+ pasid_next = pasid_next < end ? pasid_next : end;
+
+ if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+ && vtd_pdire_present(&pdire)) {
+ pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+ vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
+ info);
+ }
+ pasid = pasid_next;
+ }
+}
+
+static void vtd_accel_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
+ int start, int end,
+ VTDPASIDCacheInfo *pc_info)
+{
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ VTDContextEntry ce;
+ int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
+
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
+ vtd_hiod->devfn, &ce)) {
+ VTDPASIDCacheInfo walk_info = *pc_info;
+ uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
+ VTD_PASID_TABLE_ENTRY_NUM;
+
+ end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
+
+ vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
+ start, end, &walk_info);
+ }
+}
+
+/*
+ * This function replays the guest pasid bindings by walking the two level
+ * guest PASID table. For each valid pasid entry, it creates an entry
+ * VTDAccelPASIDCacheEntry dynamically if not exist yet. This entry holds
+ * info specific to a pasid
+ */
+void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+ int start = IOMMU_NO_PASID, end = 1 << s->pasid;
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter hiod_it;
+
+ if (!s->fsts) {
+ return;
+ }
+
+ switch (pc_info->type) {
+ case VTD_INV_DESC_PASIDC_G_PASID_SI:
+ start = pc_info->pasid;
+ end = pc_info->pasid + 1;
+ /* fall through */
+ case VTD_INV_DESC_PASIDC_G_DSI:
+ /*
+ * loop all assigned devices, do domain id check in
+ * vtd_sm_pasid_table_walk_one() after get pasid entry.
+ */
+ break;
+ case VTD_INV_DESC_PASIDC_G_GLOBAL:
+ /* loop all assigned devices */
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Loop all the vtd_hiod instances to sync the "pasid cache" per the
+ * guest pasid configuration.
+ *
+ * VTD translation callback never accesses vtd_hiod and its corresponding
+ * cached pasid entry, so no iommu lock needed here.
+ */
+ g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {
+ if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
+ TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+ continue;
+ }
+ vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
+ }
+}
+
static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
void *caps, uint32_t size)
{
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition for pc_inv_dsc request
2026-05-09 4:08 ` [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
@ 2026-05-14 11:25 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:25 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> Structure VTDAddressSpace includes some elements suitable for emulated
> device and passthrough device without PASID, e.g., address space,
> different memory regions, etc, it is also protected by vtd iommu lock,
> all these are useless and become a burden for passthrough device with
> PASID.
>
> When there are lots of PASIDs used in one device, the AS and MRs are
> all registered to memory core and impact the whole system performance.
>
> So instead of using VTDAddressSpace to cache pasid entry for each pasid
> of a passthrough device, we define a light weight structure
> VTDAccelPASIDCacheEntry with only necessary elements for each pasid. We
> will use this struct as a parameter to conduct binding/unbinding to
> nested hwpt and to record the current bound nested hwpt. It's also
> designed to support IOMMU_NO_PASID.
>
> VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c,
> similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c
>
> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
> (pasid cache invalidation) request, walk through each pasid in each
> passthrough device for valid pasid entries, create a new
> VTDAccelPASIDCacheEntry if not existing yet.
>
> IOMMU_NO_PASID of passthrough device still need to register MRs in case
> guest does not operate in scalable mode. So for IOMMU_NO_PASID, we have
> both VTDAPASIDCacheEntry and VTDAccelPASIDCacheEntry.
The implementation LGTM. But I got a question to ask here.
VTDAPASIDCacheEntry is cached in VTDAddressSpace, while
VTDAccelPASIDCacheEntry is cached in VTDHostIOMMUDevice. A natural
question is why usingVTDHostIOMMUDevice instead of VTDAddressSpace. I
think it might be valuable to mark the reason of this choice. This
would help maintaining it in future as we might forgot the reason. :)
> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
> hw/i386/intel_iommu_accel.h | 13 +++
> hw/i386/intel_iommu_internal.h | 8 ++
> hw/i386/intel_iommu.c | 3 +
> hw/i386/intel_iommu_accel.c | 156 +++++++++++++++++++++++++++++++++
> 4 files changed, 180 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index e5f0b077b4..c9b1823745 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -12,6 +12,13 @@
> #define HW_I386_INTEL_IOMMU_ACCEL_H
> #include CONFIG_DEVICES
>
> +typedef struct VTDAccelPASIDCacheEntry {
> + VTDHostIOMMUDevice *vtd_hiod;
> + VTDPASIDEntry pasid_entry;
> + uint32_t pasid;
> + QLIST_ENTRY(VTDAccelPASIDCacheEntry) next;
> +} VTDAccelPASIDCacheEntry;
> +
> #ifdef CONFIG_VTD_ACCEL
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp);
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> uint32_t pasid, hwaddr addr,
> uint64_t npages, bool ih);
> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
> #else
> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> @@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
> {
> }
>
> +static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info)
> +{
> +}
> +
> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
> {
> }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 0141316f83..623dc24760 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -615,6 +615,7 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
> @@ -645,6 +646,7 @@ typedef struct VTDPIOTLBInvInfo {
> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> @@ -710,6 +712,7 @@ typedef struct VTDHostIOMMUDevice {
> PCIBus *bus;
> uint8_t devfn;
> HostIOMMUDevice *hiod;
> + QLIST_HEAD(, VTDAccelPASIDCacheEntry) pasid_cache_list;
> } VTDHostIOMMUDevice;
>
> /*
> @@ -767,6 +770,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> return memcmp(p1, p2, sizeof(*p1));
> }
>
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
> VTDPASIDDirEntry *pdire);
> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index b50c556c40..e1e32959d3 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3181,6 +3181,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
> pc_info);
> vtd_iommu_unlock(s);
> +
> + vtd_accel_pasid_cache_sync(s, pc_info);
> }
>
> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
> @@ -4751,6 +4753,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> vtd_hiod->devfn = (uint8_t)devfn;
> vtd_hiod->iommu_state = s;
> vtd_hiod->hiod = hiod;
> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>
> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
> g_free(vtd_hiod);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 10bdbba632..a66d63b4c8 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -259,6 +259,162 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> vtd_flush_host_piotlb_locked, &piotlb_info);
> }
>
> +static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> + VTDPASIDEntry *pe)
> +{
> + VTDAccelPASIDCacheEntry *vtd_pce;
> +
> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
> + if (vtd_pce->pasid == pasid) {
> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) {
> + vtd_pce->pasid_entry = *pe;
> + }
> + return;
> + }
> + }
> +
> + vtd_pce = g_malloc0(sizeof(VTDAccelPASIDCacheEntry));
> + vtd_pce->vtd_hiod = vtd_hiod;
> + vtd_pce->pasid = pasid;
> + vtd_pce->pasid_entry = *pe;
> + QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> +}
> +
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then create
> + * VTDAccelPASIDCacheEntry if not exist yet.
> + */
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pt_base, int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDPASIDEntry pe;
> + int pasid;
> +
> + for (pasid = start; pasid < end; pasid++) {
> + if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
> + !vtd_pe_present(&pe)) {
> + continue;
> + }
> +
> + if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
> + info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> + /*
> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> + * requires domain id check. If domain id check fail,
> + * go to next pasid.
> + */
> + continue;
> + }
> +
> + vtd_accel_fill_pc(vtd_hiod, pasid, &pe);
> + }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pdt_base, int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDDirEntry pdire;
> + int pasid = start;
> + int pasid_next;
> + dma_addr_t pt_base;
> +
> + while (pasid < end) {
> + pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
> + ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
> + pasid_next = pasid_next < end ? pasid_next : end;
> +
> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> + && vtd_pdire_present(&pdire)) {
> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> + vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
> + info);
> + }
> + pasid = pasid_next;
> + }
> +}
> +
> +static void vtd_accel_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
> + int start, int end,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDContextEntry ce;
> + int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
> +
> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> + vtd_hiod->devfn, &ce)) {
> + VTDPASIDCacheInfo walk_info = *pc_info;
> + uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
> + VTD_PASID_TABLE_ENTRY_NUM;
> +
> + end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
> +
> + vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
> + start, end, &walk_info);
> + }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it creates an entry
> + * VTDAccelPASIDCacheEntry dynamically if not exist yet. This entry holds
> + * info specific to a pasid
> + */
> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> + int start = IOMMU_NO_PASID, end = 1 << s->pasid;
> + VTDHostIOMMUDevice *vtd_hiod;
> + GHashTableIter hiod_it;
> +
> + if (!s->fsts) {
> + return;
> + }
> +
> + switch (pc_info->type) {
> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
> + start = pc_info->pasid;
> + end = pc_info->pasid + 1;
> + /* fall through */
> + case VTD_INV_DESC_PASIDC_G_DSI:
> + /*
> + * loop all assigned devices, do domain id check in
> + * vtd_sm_pasid_table_walk_one() after get pasid entry.
> + */
> + break;
> + case VTD_INV_DESC_PASIDC_G_GLOBAL:
> + /* loop all assigned devices */
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + /*
> + * Loop all the vtd_hiod instances to sync the "pasid cache" per the
> + * guest pasid configuration.
> + *
> + * VTD translation callback never accesses vtd_hiod and its corresponding
> + * cached pasid entry, so no iommu lock needed here.
> + */
> + g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);
> + while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {
> + if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> + TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> + continue;
> + }
> + vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> + }
> +}
> +
> static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
> void *caps, uint32_t size)
> {
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 10/15] intel_iommu_accel: Handle PASID entry removal for pc_inv_dsc request
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (8 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:25 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 11/15] intel_iommu_accel: Bypass PASID entry addition for just deleted entry Zhenzhong Duan
` (4 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When guest deletes PASID entries, QEMU will capture the pasid cache
invalidation request, walk through pasid_cache_list in each passthrough
device to find stale VTDAccelPASIDCacheEntry and delete them.
Co-developed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_accel.c | 80 +++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index a66d63b4c8..82bfbdf484 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -16,6 +16,28 @@
#include "hw/pci/pci_bus.h"
#include "trace.h"
+static int vtd_hiod_get_pe_from_pasid(VTDAccelPASIDCacheEntry *vtd_pce,
+ VTDPASIDEntry *pe)
+{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ uint32_t pasid = vtd_pce->pasid;
+ VTDContextEntry ce;
+ int ret;
+
+ if (!s->dmar_enabled || !s->root_scalable) {
+ return -VTD_FR_RTADDR_INV_TTM;
+ }
+
+ ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
+ vtd_hiod->devfn, &ce);
+ if (ret) {
+ return ret;
+ }
+
+ return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
+}
+
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp)
{
@@ -280,6 +302,57 @@ static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
}
+static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce)
+{
+ QLIST_REMOVE(vtd_pce, next);
+ g_free(vtd_pce);
+}
+
+static void
+vtd_accel_pasid_cache_invalidate_one(VTDAccelPASIDCacheEntry *vtd_pce,
+ VTDPASIDCacheInfo *pc_info)
+{
+ VTDPASIDEntry pe;
+ uint16_t did;
+
+ /*
+ * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
+ * DID check. If DID doesn't match the value in cache or memory, then
+ * it's not a pasid entry we want to invalidate.
+ */
+ switch (pc_info->type) {
+ case VTD_INV_DESC_PASIDC_G_PASID_SI:
+ if (pc_info->pasid != vtd_pce->pasid) {
+ return;
+ }
+ /* Fall through */
+ case VTD_INV_DESC_PASIDC_G_DSI:
+ did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pasid_entry);
+ if (pc_info->did != did) {
+ return;
+ }
+ }
+
+ if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
+ /*
+ * No valid pasid entry in guest memory. e.g. pasid entry was modified
+ * to be either all-zero or non-present. Either case means existing
+ * pasid cache should be invalidated.
+ */
+ vtd_accel_delete_pc(vtd_pce);
+ }
+}
+
+static void vtd_accel_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
+ VTDPASIDCacheInfo *pc_info)
+{
+ VTDAccelPASIDCacheEntry *vtd_pce, *next;
+
+ QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
+ vtd_accel_pasid_cache_invalidate_one(vtd_pce, pc_info);
+ }
+}
+
/*
* This function walks over PASID range within [start, end) in a single
* PASID table for entries matching @info type/did, then create
@@ -411,6 +484,13 @@ void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
continue;
}
+
+ /*
+ * PASID entry removal is handled before addition intentionally,
+ * because it's unnecessary to iterate on an entry that will be
+ * removed.
+ */
+ vtd_accel_pasid_cache_invalidate(vtd_hiod, pc_info);
vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
}
}
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 10/15] intel_iommu_accel: Handle PASID entry removal for pc_inv_dsc request
2026-05-09 4:08 ` [PATCH v5 10/15] intel_iommu_accel: Handle PASID entry removal " Zhenzhong Duan
@ 2026-05-14 11:25 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:25 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> When guest deletes PASID entries, QEMU will capture the pasid cache
> invalidation request, walk through pasid_cache_list in each passthrough
> device to find stale VTDAccelPASIDCacheEntry and delete them.
>
> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
> hw/i386/intel_iommu_accel.c | 80 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 80 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index a66d63b4c8..82bfbdf484 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -16,6 +16,28 @@
> #include "hw/pci/pci_bus.h"
> #include "trace.h"
>
> +static int vtd_hiod_get_pe_from_pasid(VTDAccelPASIDCacheEntry *vtd_pce,
this helper is tricky. vtd_pce already has a pe cached, then why need
to do this? I think it's better to have a helper receives s, devfn, and
pasid instead of accepting vtd_pce. btw. I think the intel_iommu.c
should have such a helper. is it?
> + VTDPASIDEntry *pe)
> +{
> + VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + uint32_t pasid = vtd_pce->pasid;
> + VTDContextEntry ce;
> + int ret;
> +
> + if (!s->dmar_enabled || !s->root_scalable) {
> + return -VTD_FR_RTADDR_INV_TTM;
> + }
> +
> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> + vtd_hiod->devfn, &ce);
> + if (ret) {
> + return ret;
> + }
> +
> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
> +}
> +
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp)
> {
> @@ -280,6 +302,57 @@ static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> }
>
> +static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce)
> +{
> + QLIST_REMOVE(vtd_pce, next);
> + g_free(vtd_pce);
> +}
> +
> +static void
> +vtd_accel_pasid_cache_invalidate_one(VTDAccelPASIDCacheEntry *vtd_pce,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + VTDPASIDEntry pe;
> + uint16_t did;
> +
> + /*
> + * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
> + * DID check. If DID doesn't match the value in cache or memory, then
> + * it's not a pasid entry we want to invalidate.
> + */
> + switch (pc_info->type) {
> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
> + if (pc_info->pasid != vtd_pce->pasid) {
> + return;
> + }
> + /* Fall through */
> + case VTD_INV_DESC_PASIDC_G_DSI:
> + did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pasid_entry);
> + if (pc_info->did != did) {
> + return;
> + }
> + }
> +
> + if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
> + /*
> + * No valid pasid entry in guest memory. e.g. pasid entry was modified
> + * to be either all-zero or non-present. Either case means existing
> + * pasid cache should be invalidated.
> + */
> + vtd_accel_delete_pc(vtd_pce);
> + }
> +}
> +
> +static void vtd_accel_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + VTDAccelPASIDCacheEntry *vtd_pce, *next;
> +
> + QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
> + vtd_accel_pasid_cache_invalidate_one(vtd_pce, pc_info);
> + }
> +}
> +
> /*
> * This function walks over PASID range within [start, end) in a single
> * PASID table for entries matching @info type/did, then create
> @@ -411,6 +484,13 @@ void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> continue;
> }
> +
> + /*
> + * PASID entry removal is handled before addition intentionally,
> + * because it's unnecessary to iterate on an entry that will be
> + * removed.
how about below? :)
/*
* The replay path inevitably needs to iterate through existing
* PASID cache entries. Since cached PASID entries that are marked
* for removal don't need to be iterated, we intentionally handle
* removals before additions to optimize the replay process.
*/
> + */
> + vtd_accel_pasid_cache_invalidate(vtd_hiod, pc_info);
> vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> }
> }
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 11/15] intel_iommu_accel: Bypass PASID entry addition for just deleted entry
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (9 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 10/15] intel_iommu_accel: Handle PASID entry removal " Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:28 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 12/15] intel_iommu_accel: Handle PASID entry removal for system reset Zhenzhong Duan
` (3 subsequent siblings)
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
For VTD_INV_DESC_PASIDC_G_PASID_SI typed pc_inv_dsc invalidation, if an
pasid entry is just removed, it can never be a new entry to add. So
calling vtd_replay_pasid_bind_for_dev() is unnecessary.
Introduce a new field accel_pce_deleted in VTDPASIDCacheInfo to mark
this case and to do the bypassing.
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 +
hw/i386/intel_iommu_accel.c | 16 +++++++++++++---
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 623dc24760..2c716c5297 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -630,6 +630,7 @@ typedef struct VTDPASIDCacheInfo {
uint8_t type;
uint16_t did;
uint32_t pasid;
+ bool accel_pce_deleted;
} VTDPASIDCacheInfo;
typedef struct VTDPIOTLBInvInfo {
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 82bfbdf484..07a1e41a95 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -302,10 +302,15 @@ static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
}
-static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce)
+static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce,
+ VTDPASIDCacheInfo *pc_info)
{
QLIST_REMOVE(vtd_pce, next);
g_free(vtd_pce);
+
+ if (pc_info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) {
+ pc_info->accel_pce_deleted = true;
+ }
}
static void
@@ -339,7 +344,7 @@ vtd_accel_pasid_cache_invalidate_one(VTDAccelPASIDCacheEntry *vtd_pce,
* to be either all-zero or non-present. Either case means existing
* pasid cache should be invalidated.
*/
- vtd_accel_delete_pc(vtd_pce);
+ vtd_accel_delete_pc(vtd_pce, pc_info);
}
}
@@ -491,7 +496,12 @@ void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
* removed.
*/
vtd_accel_pasid_cache_invalidate(vtd_hiod, pc_info);
- vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
+
+ if (pc_info->accel_pce_deleted) {
+ pc_info->accel_pce_deleted = false;
+ } else {
+ vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
+ }
}
}
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 11/15] intel_iommu_accel: Bypass PASID entry addition for just deleted entry
2026-05-09 4:08 ` [PATCH v5 11/15] intel_iommu_accel: Bypass PASID entry addition for just deleted entry Zhenzhong Duan
@ 2026-05-14 11:28 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:28 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> For VTD_INV_DESC_PASIDC_G_PASID_SI typed pc_inv_dsc invalidation, if an
> pasid entry is just removed, it can never be a new entry to add. So
> calling vtd_replay_pasid_bind_for_dev() is unnecessary.
>
> Introduce a new field accel_pce_deleted in VTDPASIDCacheInfo to mark
> this case and to do the bypassing.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 1 +
> hw/i386/intel_iommu_accel.c | 16 +++++++++++++---
> 2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 623dc24760..2c716c5297 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -630,6 +630,7 @@ typedef struct VTDPASIDCacheInfo {
> uint8_t type;
> uint16_t did;
> uint32_t pasid;
> + bool accel_pce_deleted;
> } VTDPASIDCacheInfo;
>
> typedef struct VTDPIOTLBInvInfo {
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 82bfbdf484..07a1e41a95 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -302,10 +302,15 @@ static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> }
>
> -static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce)
> +static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce,
> + VTDPASIDCacheInfo *pc_info)
> {
> QLIST_REMOVE(vtd_pce, next);
> g_free(vtd_pce);
> +
> + if (pc_info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) {
> + pc_info->accel_pce_deleted = true;
> + }
> }
>
> static void
> @@ -339,7 +344,7 @@ vtd_accel_pasid_cache_invalidate_one(VTDAccelPASIDCacheEntry *vtd_pce,
> * to be either all-zero or non-present. Either case means existing
> * pasid cache should be invalidated.
> */
> - vtd_accel_delete_pc(vtd_pce);
> + vtd_accel_delete_pc(vtd_pce, pc_info);
> }
> }
>
> @@ -491,7 +496,12 @@ void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> * removed.
> */
> vtd_accel_pasid_cache_invalidate(vtd_hiod, pc_info);
> - vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> +
> + if (pc_info->accel_pce_deleted) {
> + pc_info->accel_pce_deleted = false;
> + } else {
> + vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> + }
> }
> }
>
LGTM.
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 12/15] intel_iommu_accel: Handle PASID entry removal for system reset
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (10 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 11/15] intel_iommu_accel: Bypass PASID entry addition for just deleted entry Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-09 4:08 ` [PATCH v5 13/15] intel_iommu_accel: Switch to VTDAccelPASIDCacheEntry for PASID bind/unbind and PIOTLB invalidation Zhenzhong Duan
` (2 subsequent siblings)
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When system level reset, DMA translation is turned off, all PASID
entries become stale and should be deleted.
vtd_hiod list is never accessed without BQL, so no need to guard with
iommu lock.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_accel.h | 5 +++++
hw/i386/intel_iommu.c | 2 ++
hw/i386/intel_iommu_accel.c | 13 +++++++++++++
3 files changed, 20 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index c9b1823745..a2226b28b6 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -28,6 +28,7 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
+void vtd_accel_pasid_cache_reset(IntelIOMMUState *s);
void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
#else
static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
@@ -62,6 +63,10 @@ static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s,
{
}
+static inline void vtd_accel_pasid_cache_reset(IntelIOMMUState *s)
+{
+}
+
static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
{
}
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e1e32959d3..ac1b5f4d79 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -391,6 +391,8 @@ static void vtd_reset_caches(IntelIOMMUState *s)
vtd_reset_context_cache_locked(s);
vtd_pasid_cache_reset_locked(s);
vtd_iommu_unlock(s);
+
+ vtd_accel_pasid_cache_reset(s);
}
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 07a1e41a95..b1c1cd719a 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -505,6 +505,19 @@ void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
}
}
+/* Fake a global pasid cache invalidation to remove all pasid cache entries */
+void vtd_accel_pasid_cache_reset(IntelIOMMUState *s)
+{
+ VTDPASIDCacheInfo pc_info = { .type = VTD_INV_DESC_PASIDC_G_GLOBAL };
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter hiod_it;
+
+ g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {
+ vtd_accel_pasid_cache_invalidate(vtd_hiod, &pc_info);
+ }
+}
+
static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
void *caps, uint32_t size)
{
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 13/15] intel_iommu_accel: Switch to VTDAccelPASIDCacheEntry for PASID bind/unbind and PIOTLB invalidation
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (11 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 12/15] intel_iommu_accel: Handle PASID entry removal for system reset Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-09 4:08 ` [PATCH v5 14/15] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
2026-05-09 4:08 ` [PATCH v5 15/15] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED and VIOMMU_FLAG_WANT_PASID_ATTACH Zhenzhong Duan
14 siblings, 0 replies; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
This patch switches from VTDAddressSpace to VTDAccelPASIDCacheEntry for
handling PASID bind/unbind operations and PIOTLB invalidations in
passthrough scenarios. VTDAccelPASIDCacheEntry was introduced to cache
PASID entries for passthrough devices and is now ready to propagate
PASID bind/unbind operations and PIOTLB invalidations to the host.
Unlike the previous approach, VTDAccelPASIDCacheEntry supports both
IOMMU_NO_PASID (rid_pasid) and other valid PASIDs, so this switch drops
IOMMU_NO_PASID limitations that existed in the prior PASID bind/unbind
and PIOTLB invalidation path. For IOMMU_NO_PASID of passthrough devices,
VTDAddressSpace continues to handle shadow page modifications to the
host, but no longer manages PASID bind/unbind operations or PIOTLB
invalidations for passthrough scenarios.
Co-developed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu_accel.h | 2 +-
include/hw/i386/intel_iommu.h | 2 -
hw/i386/intel_iommu.c | 17 +----
hw/i386/intel_iommu_accel.c | 131 +++++++++++++++++-----------------
4 files changed, 68 insertions(+), 84 deletions(-)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index a2226b28b6..4a9003c92d 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -16,6 +16,7 @@ typedef struct VTDAccelPASIDCacheEntry {
VTDHostIOMMUDevice *vtd_hiod;
VTDPASIDEntry pasid_entry;
uint32_t pasid;
+ uint32_t fs_hwpt_id;
QLIST_ENTRY(VTDAccelPASIDCacheEntry) next;
} VTDAccelPASIDCacheEntry;
@@ -23,7 +24,6 @@ typedef struct VTDAccelPASIDCacheEntry {
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp);
VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
-bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 95c76015e4..1842ba5840 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -154,8 +154,6 @@ struct VTDAddressSpace {
* with the guest IOMMU pgtables for a device.
*/
IOVATree *iova_tree;
-
- uint32_t fs_hwpt_id;
};
struct VTDIOTLBEntry {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ac1b5f4d79..6067069e02 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -86,8 +86,6 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
if (pc_entry->valid) {
pc_entry->valid = false;
- /* It's fatal to get failure during reset */
- vtd_propagate_guest_pasid(vtd_as, &error_fatal);
}
}
}
@@ -3105,8 +3103,6 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
VTDPASIDEntry pe;
IOMMUNotifier *n;
uint16_t did;
- const char *err_prefix = "Attaching to HWPT failed: ";
- Error *local_err = NULL;
if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
if (!pc_entry->valid) {
@@ -3127,9 +3123,6 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
vtd_address_space_unmap(vtd_as, n);
}
vtd_switch_address_space(vtd_as);
-
- err_prefix = "Detaching from HWPT failed: ";
- goto do_bind_unbind;
}
/*
@@ -3157,20 +3150,12 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
if (!pc_entry->valid) {
pc_entry->pasid_entry = pe;
pc_entry->valid = true;
- } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
- err_prefix = "Replacing HWPT attachment failed: ";
- } else {
+ } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
return;
}
vtd_switch_address_space(vtd_as);
vtd_address_space_sync(vtd_as);
-
-do_bind_unbind:
- /* TODO: Fault event injection into guest, report error to QEMU for now */
- if (!vtd_propagate_guest_pasid(vtd_as, &local_err)) {
- error_reportf_err(local_err, "%s", err_prefix);
- }
}
static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index b1c1cd719a..4ddf66262c 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -111,24 +111,25 @@ static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
fs_hwpt_id, errp);
}
-static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as)
+static void vtd_destroy_old_fs_hwpt(VTDAccelPASIDCacheEntry *vtd_pce)
{
- HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
+ HostIOMMUDeviceIOMMUFD *hiodi =
+ HOST_IOMMU_DEVICE_IOMMUFD(vtd_pce->vtd_hiod->hiod);
- if (!vtd_as->fs_hwpt_id) {
+ if (!vtd_pce->fs_hwpt_id) {
return;
}
- iommufd_backend_free_id(hiodi->iommufd, vtd_as->fs_hwpt_id);
- vtd_as->fs_hwpt_id = 0;
+ iommufd_backend_free_id(hiodi->iommufd, vtd_pce->fs_hwpt_id);
+ vtd_pce->fs_hwpt_id = 0;
}
-static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as, Error **errp)
+static bool vtd_device_attach_iommufd(VTDAccelPASIDCacheEntry *vtd_pce,
+ Error **errp)
{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
- VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
- uint32_t hwpt_id = hiodi->hwpt_id;
+ VTDPASIDEntry *pe = &vtd_pce->pasid_entry;
+ uint32_t hwpt_id = hiodi->hwpt_id, pasid = vtd_pce->pasid;
bool ret;
/*
@@ -148,14 +149,13 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
}
- ret = host_iommu_device_iommufd_attach_hwpt(hiodi, IOMMU_NO_PASID, hwpt_id,
- errp);
- trace_vtd_device_attach_hwpt(hiodi->devid, IOMMU_NO_PASID, hwpt_id, ret);
+ ret = host_iommu_device_iommufd_attach_hwpt(hiodi, pasid, hwpt_id, errp);
+ trace_vtd_device_attach_hwpt(hiodi->devid, pasid, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
- vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_pce);
if (vtd_pe_pgtt_is_fst(pe)) {
- vtd_as->fs_hwpt_id = hwpt_id;
+ vtd_pce->fs_hwpt_id = hwpt_id;
}
} else if (vtd_pe_pgtt_is_fst(pe)) {
iommufd_backend_free_id(hiodi->iommufd, hwpt_id);
@@ -164,17 +164,19 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
return ret;
}
-static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as, Error **errp)
+static bool vtd_device_detach_iommufd(VTDAccelPASIDCacheEntry *vtd_pce,
+ Error **errp)
{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
HostIOMMUDeviceIOMMUFD *hiodi = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
- IntelIOMMUState *s = vtd_as->iommu_state;
+
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ uint32_t pasid = vtd_pce->pasid;
bool ret;
- if (s->dmar_enabled && s->root_scalable) {
- ret = host_iommu_device_iommufd_detach_hwpt(hiodi, IOMMU_NO_PASID,
- errp);
- trace_vtd_device_detach_hwpt(hiodi->devid, IOMMU_NO_PASID, ret);
+ if (pasid != IOMMU_NO_PASID || (s->dmar_enabled && s->root_scalable)) {
+ ret = host_iommu_device_iommufd_detach_hwpt(hiodi, pasid, errp);
+ trace_vtd_device_detach_hwpt(hiodi->devid, pasid, ret);
} else {
/*
* If DMAR remapping is disabled or guest switches to legacy mode,
@@ -188,58 +190,32 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
if (ret) {
- vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_pce);
}
return ret;
}
-bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
-{
- VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
- VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
-
- /* Ignore emulated device or legacy VFIO backed device */
- if (!vtd_as->iommu_state->fsts || !vtd_hiod) {
- return true;
- }
-
- if (pc_entry->valid) {
- return vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
- }
-
- return vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
-}
-
/*
- * This function is a loop function for the s->vtd_address_spaces
- * list with VTDPIOTLBInvInfo as execution filter. It propagates
- * the piotlb invalidation to host.
+ * This function is a loop function for the s->vtd_host_iommu_dev
+ * and vtd_hiod->pasid_cache_list lists with VTDPIOTLBInvInfo as
+ * execution filter. It propagates the piotlb invalidation to host.
*/
-static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
- gpointer user_data)
+static void vtd_flush_host_piotlb_locked(VTDAccelPASIDCacheEntry *vtd_pce,
+ VTDPIOTLBInvInfo *piotlb_info)
{
- VTDPIOTLBInvInfo *piotlb_info = user_data;
- VTDAddressSpace *vtd_as = value;
- VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
- VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
+ VTDPASIDEntry *pe = &vtd_pce->pasid_entry;
uint16_t did;
- if (!vtd_hiod) {
- return;
- }
-
- assert(vtd_as->pasid == IOMMU_NO_PASID);
-
/* Nothing to do if there is no first stage HWPT attached */
- if (!pc_entry->valid ||
- !vtd_pe_pgtt_is_fst(&pc_entry->pasid_entry)) {
+ if (!vtd_pe_pgtt_is_fst(pe)) {
return;
}
- did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
+ did = VTD_SM_PASID_ENTRY_DID(pe);
- if (piotlb_info->domain_id == did && piotlb_info->pasid == IOMMU_NO_PASID) {
+ if (piotlb_info->domain_id == did && piotlb_info->pasid == vtd_pce->pasid) {
HostIOMMUDeviceIOMMUFD *hiodi =
HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
uint32_t entry_num = 1; /* Only implement one request for simplicity */
@@ -247,7 +223,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
struct iommu_hwpt_vtd_s1_invalidate *cache = piotlb_info->inv_data;
if (!iommufd_backend_invalidate_cache(hiodi->iommufd,
- vtd_as->fs_hwpt_id,
+ vtd_pce->fs_hwpt_id,
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
sizeof(*cache), &entry_num, cache,
&local_err)) {
@@ -263,6 +239,8 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
{
struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
VTDPIOTLBInvInfo piotlb_info;
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter hiod_it;
cache_info.addr = addr;
cache_info.npages = npages;
@@ -273,23 +251,36 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
piotlb_info.inv_data = &cache_info;
/*
- * Go through each vtd_as instance in s->vtd_address_spaces, find out
- * affected host devices which need host piotlb invalidation. Piotlb
- * invalidation should check pasid cache per architecture point of view.
+ * Go through each vtd_pce in vtd_hiod->pasid_cache_list for each host
+ * device, find out affected host device pasid which need host piotlb
+ * invalidation. Piotlb invalidation should check pasid cache per
+ * architecture point of view.
*/
- g_hash_table_foreach(s->vtd_address_spaces,
- vtd_flush_host_piotlb_locked, &piotlb_info);
+ g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {
+ VTDAccelPASIDCacheEntry *vtd_pce;
+
+ QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
+ vtd_flush_host_piotlb_locked(vtd_pce, &piotlb_info);
+ }
+ }
}
static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
VTDPASIDEntry *pe)
{
VTDAccelPASIDCacheEntry *vtd_pce;
+ Error *local_err = NULL;
QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
if (vtd_pce->pasid == pasid) {
if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) {
vtd_pce->pasid_entry = *pe;
+
+ if (!vtd_device_attach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s",
+ "Replacing HWPT attachment failed: ");
+ }
}
return;
}
@@ -300,11 +291,21 @@ static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
vtd_pce->pasid = pasid;
vtd_pce->pasid_entry = *pe;
QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
+
+ if (!vtd_device_attach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s", "Attaching to HWPT failed: ");
+ }
}
static void vtd_accel_delete_pc(VTDAccelPASIDCacheEntry *vtd_pce,
VTDPASIDCacheInfo *pc_info)
{
+ Error *local_err = NULL;
+
+ if (!vtd_device_detach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s", "Detaching from HWPT failed: ");
+ }
+
QLIST_REMOVE(vtd_pce, next);
g_free(vtd_pce);
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v5 14/15] intel_iommu_accel: Add pasid bits size check
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (12 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 13/15] intel_iommu_accel: Switch to VTDAccelPASIDCacheEntry for PASID bind/unbind and PIOTLB invalidation Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:25 ` Yi Liu
2026-05-09 4:08 ` [PATCH v5 15/15] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED and VIOMMU_FLAG_WANT_PASID_ATTACH Zhenzhong Duan
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
If pasid bits size is bigger than host side, host could fail to emulate
all bindings in guest. Add a check to fail device plug early.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
---
hw/i386/intel_iommu_internal.h | 1 +
hw/i386/intel_iommu_accel.c | 8 ++++++++
2 files changed, 9 insertions(+)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 2c716c5297..519af3fa90 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -196,6 +196,7 @@
#define VTD_ECAP_SRS (1ULL << 31)
#define VTD_ECAP_NWFS (1ULL << 33)
#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
+#define VTD_ECAP_GET_PSS(ecap) extract64(ecap, 35, 5)
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_PDS (1ULL << 42)
#define VTD_ECAP_SMTS (1ULL << 43)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 4ddf66262c..a0dd6b0ee0 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -44,6 +44,7 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
HostIOMMUDevice *hiod = vtd_hiod->hiod;
struct HostIOMMUDeviceCaps *caps = &hiod->caps;
struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
+ uint8_t hpasid = VTD_ECAP_GET_PSS(vtd->ecap_reg) + 1;
PCIBus *bus = vtd_hiod->bus;
PCIDevice *pdev = bus->devices[vtd_hiod->devfn];
@@ -64,6 +65,13 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
return false;
}
+ /* Only do the check when host device support PASIDs */
+ if (caps->max_pasid_log2 && s->pasid > hpasid) {
+ error_setg(errp, "PASID bits size %d > host IOMMU PASID bits size %d",
+ s->pasid, hpasid);
+ return false;
+ }
+
if (pci_device_get_iommu_bus_devfn(pdev, &bus, NULL, NULL)) {
error_setg(errp, "Host device downstream to a PCI bridge is "
"unsupported when x-flts=on");
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 14/15] intel_iommu_accel: Add pasid bits size check
2026-05-09 4:08 ` [PATCH v5 14/15] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
@ 2026-05-14 11:25 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:25 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> If pasid bits size is bigger than host side, host could fail to emulate
> all bindings in guest. Add a check to fail device plug early.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
> ---
> hw/i386/intel_iommu_internal.h | 1 +
> hw/i386/intel_iommu_accel.c | 8 ++++++++
> 2 files changed, 9 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2c716c5297..519af3fa90 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -196,6 +196,7 @@
> #define VTD_ECAP_SRS (1ULL << 31)
> #define VTD_ECAP_NWFS (1ULL << 33)
> #define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
> +#define VTD_ECAP_GET_PSS(ecap) extract64(ecap, 35, 5)
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_PDS (1ULL << 42)
> #define VTD_ECAP_SMTS (1ULL << 43)
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 4ddf66262c..a0dd6b0ee0 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -44,6 +44,7 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> HostIOMMUDevice *hiod = vtd_hiod->hiod;
> struct HostIOMMUDeviceCaps *caps = &hiod->caps;
> struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
> + uint8_t hpasid = VTD_ECAP_GET_PSS(vtd->ecap_reg) + 1;
> PCIBus *bus = vtd_hiod->bus;
> PCIDevice *pdev = bus->devices[vtd_hiod->devfn];
>
> @@ -64,6 +65,13 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> return false;
> }
>
> + /* Only do the check when host device support PASIDs */
> + if (caps->max_pasid_log2 && s->pasid > hpasid) {
the second comparison looks strange. hpasid is derived from ecap_reg,
while ecap_reg is from s->pasid... is there any place that changes
the pss filed of ecap_reg afterward? I think this check should be
against caps->max_pasid_log2 as this is the value from hardware. right?
> + error_setg(errp, "PASID bits size %d > host IOMMU PASID bits size %d",
> + s->pasid, hpasid);
> + return false;
> + }
> +
> if (pci_device_get_iommu_bus_devfn(pdev, &bus, NULL, NULL)) {
> error_setg(errp, "Host device downstream to a PCI bridge is "
> "unsupported when x-flts=on");
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v5 15/15] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED and VIOMMU_FLAG_WANT_PASID_ATTACH
2026-05-09 4:07 [PATCH v5 00/15] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (13 preceding siblings ...)
2026-05-09 4:08 ` [PATCH v5 14/15] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
@ 2026-05-09 4:08 ` Zhenzhong Duan
2026-05-14 11:25 ` Yi Liu
14 siblings, 1 reply; 24+ messages in thread
From: Zhenzhong Duan @ 2026-05-09 4:08 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
VFIO device will check flag VIOMMU_FLAG_PASID_SUPPORTED and expose PASID
capability, also check VIOMMU_FLAG_WANT_PASID_ATTACH to enable pasid
attachment, without those guest could not enable PASID of this device even
if vIOMMU's pasid is configured.
We don't expose the two flags when first stage is not configured as we
don't support shadow page table on a PASID.
This is the final knob to enable PASID.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Xudong Hao <xudong.hao@intel.com>
---
hw/i386/intel_iommu.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6067069e02..70546e91d4 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4788,6 +4788,11 @@ static uint64_t vtd_get_viommu_flags(void *opaque)
if (s->fsts) {
flags = VIOMMU_FLAG_WANT_NESTING_PARENT |
VIOMMU_FLAG_WANT_NESTING_DIRTY_TRACKING;
+
+ if (s->pasid) {
+ flags |= VIOMMU_FLAG_PASID_SUPPORTED |
+ VIOMMU_FLAG_WANT_PASID_ATTACH;
+ }
}
return flags;
--
2.47.3
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v5 15/15] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED and VIOMMU_FLAG_WANT_PASID_ATTACH
2026-05-09 4:08 ` [PATCH v5 15/15] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED and VIOMMU_FLAG_WANT_PASID_ATTACH Zhenzhong Duan
@ 2026-05-14 11:25 ` Yi Liu
0 siblings, 0 replies; 24+ messages in thread
From: Yi Liu @ 2026-05-14 11:25 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 5/9/26 12:08, Zhenzhong Duan wrote:
> VFIO device will check flag VIOMMU_FLAG_PASID_SUPPORTED and expose PASID
> capability, also check VIOMMU_FLAG_WANT_PASID_ATTACH to enable pasid
> attachment, without those guest could not enable PASID of this device even
> if vIOMMU's pasid is configured.
>
> We don't expose the two flags when first stage is not configured as we
nit: s/first stage/fist stage translation/
> don't support shadow page table on a PASID.
>
> This is the final knob to enable PASID.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Tested-by: Xudong Hao <xudong.hao@intel.com>
> ---
> hw/i386/intel_iommu.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 6067069e02..70546e91d4 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -4788,6 +4788,11 @@ static uint64_t vtd_get_viommu_flags(void *opaque)
> if (s->fsts) {
> flags = VIOMMU_FLAG_WANT_NESTING_PARENT |
> VIOMMU_FLAG_WANT_NESTING_DIRTY_TRACKING;
> +
> + if (s->pasid) {
> + flags |= VIOMMU_FLAG_PASID_SUPPORTED |
> + VIOMMU_FLAG_WANT_PASID_ATTACH;
> + }
> }
>
> return flags;
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
^ permalink raw reply [flat|nested] 24+ messages in thread