* [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
@ 2026-03-06 3:43 ` Zhenzhong Duan
2026-03-18 11:55 ` Yi Liu
2026-03-06 3:43 ` [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid Zhenzhong Duan
` (11 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:43 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
For attachment with pasid, pasid together with flag VFIO_DEVICE_ATTACH_PASID
should be passed in.
Define IOMMU_NO_PASID to represent device attachment without pasid same as
in kernel.
The implementation is similar for detachment.
Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/core/iommu.h | 2 ++
hw/vfio/iommufd.c | 44 +++++++++++++++++++++++++----------------
hw/vfio/trace-events | 4 ++--
3 files changed, 31 insertions(+), 19 deletions(-)
diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
index 86af315c15..bfcd511013 100644
--- a/include/hw/core/iommu.h
+++ b/include/hw/core/iommu.h
@@ -28,4 +28,6 @@ enum host_iommu_quirks {
HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO = BIT_ULL(0),
};
+#define IOMMU_NO_PASID 0
+
#endif /* HW_IOMMU_H */
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 131612eb83..b4c5e81b1d 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,6 +21,7 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/core/qdev.h"
+#include "hw/core/iommu.h"
#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
@@ -302,43 +303,48 @@ out:
return ret;
}
-static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
- Error **errp)
+static int iommufd_cdev_pasid_attach_ioas_hwpt(VFIODevice *vbasedev,
+ uint32_t pasid, uint32_t id,
+ Error **errp)
{
int iommufd = vbasedev->iommufd->fd;
struct vfio_device_attach_iommufd_pt attach_data = {
.argsz = sizeof(attach_data),
- .flags = 0,
+ .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_ATTACH_PASID,
+ .pasid = pasid,
.pt_id = id,
};
/* Attach device to an IOAS or hwpt within iommufd */
if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) {
error_setg_errno(errp, errno,
- "[iommufd=%d] error attach %s (%d) to id=%d",
- iommufd, vbasedev->name, vbasedev->fd, id);
+ "[iommufd=%d] error attach %s (%d) pasid %d to id=%d",
+ iommufd, vbasedev->name, vbasedev->fd, pasid, id);
return -errno;
}
- trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
- vbasedev->fd, id);
+ trace_iommufd_cdev_pasid_attach_ioas_hwpt(iommufd, vbasedev->name,
+ vbasedev->fd, pasid, id);
return 0;
}
-static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
+static bool iommufd_cdev_pasid_detach_ioas_hwpt(VFIODevice *vbasedev,
+ uint32_t pasid, Error **errp)
{
int iommufd = vbasedev->iommufd->fd;
struct vfio_device_detach_iommufd_pt detach_data = {
.argsz = sizeof(detach_data),
- .flags = 0,
+ .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_DETACH_PASID,
+ .pasid = pasid,
};
if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) {
- error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
+ error_setg_errno(errp, errno, "detach %s pasid %d failed",
+ vbasedev->name, pasid);
return false;
}
- trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
+ trace_iommufd_cdev_pasid_detach_ioas_hwpt(iommufd, vbasedev->name, pasid);
return true;
}
@@ -359,7 +365,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
if (!cpr_is_incoming()) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ hwpt->hwpt_id, errp);
} else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
ret = 0;
} else {
@@ -432,7 +439,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, hwpt_id,
+ errp);
if (ret) {
iommufd_backend_free_id(container->be, hwpt_id);
return false;
@@ -485,7 +493,8 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
/* If CPR, we are already attached to ioas_id. */
return cpr_is_incoming() ||
- !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -493,7 +502,7 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
{
Error *err = NULL;
- if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
+ if (!iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, &err)) {
error_report_err(err);
}
@@ -919,7 +928,8 @@ host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
+ hwpt_id, errp);
}
static bool
@@ -928,7 +938,7 @@ host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
- return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+ return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
}
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 846e3625c5..764a3e4855 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -182,8 +182,8 @@ vfio_vmstate_change_prepare(const char *name, int running, const char *reason, c
iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
-iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
-iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s"
+iommufd_cdev_pasid_attach_ioas_hwpt(int iommufd, const char *name, int devfd, uint32_t pasid, int id) " [iommufd=%d] Successfully attached device %s (%d) pasid %d to id=%d"
+iommufd_cdev_pasid_detach_ioas_hwpt(int iommufd, const char *name, uint32_t pasid) " [iommufd=%d] Successfully detached %s pasid %d"
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid
2026-03-06 3:43 ` [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid Zhenzhong Duan
@ 2026-03-18 11:55 ` Yi Liu
2026-03-19 7:43 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-18 11:55 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:43, Zhenzhong Duan wrote:
> For attachment with pasid, pasid together with flag VFIO_DEVICE_ATTACH_PASID
> should be passed in.
>
> Define IOMMU_NO_PASID to represent device attachment without pasid same as
> in kernel.
>
> The implementation is similar for detachment.
>
> Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/hw/core/iommu.h | 2 ++
> hw/vfio/iommufd.c | 44 +++++++++++++++++++++++++----------------
> hw/vfio/trace-events | 4 ++--
> 3 files changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
> index 86af315c15..bfcd511013 100644
> --- a/include/hw/core/iommu.h
> +++ b/include/hw/core/iommu.h
> @@ -28,4 +28,6 @@ enum host_iommu_quirks {
> HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO = BIT_ULL(0),
> };
>
> +#define IOMMU_NO_PASID 0
> +
> #endif /* HW_IOMMU_H */
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 131612eb83..b4c5e81b1d 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -21,6 +21,7 @@
> #include "qapi/error.h"
> #include "system/iommufd.h"
> #include "hw/core/qdev.h"
> +#include "hw/core/iommu.h"
nit: move this line before qdev.h. Other parts LGTM.
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> #include "hw/vfio/vfio-cpr.h"
> #include "system/reset.h"
> #include "qemu/cutils.h"
> @@ -302,43 +303,48 @@ out:
> return ret;
> }
>
> -static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
> - Error **errp)
> +static int iommufd_cdev_pasid_attach_ioas_hwpt(VFIODevice *vbasedev,
> + uint32_t pasid, uint32_t id,
> + Error **errp)
> {
> int iommufd = vbasedev->iommufd->fd;
> struct vfio_device_attach_iommufd_pt attach_data = {
> .argsz = sizeof(attach_data),
> - .flags = 0,
> + .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_ATTACH_PASID,
> + .pasid = pasid,
> .pt_id = id,
> };
>
> /* Attach device to an IOAS or hwpt within iommufd */
> if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) {
> error_setg_errno(errp, errno,
> - "[iommufd=%d] error attach %s (%d) to id=%d",
> - iommufd, vbasedev->name, vbasedev->fd, id);
> + "[iommufd=%d] error attach %s (%d) pasid %d to id=%d",
> + iommufd, vbasedev->name, vbasedev->fd, pasid, id);
> return -errno;
> }
>
> - trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
> - vbasedev->fd, id);
> + trace_iommufd_cdev_pasid_attach_ioas_hwpt(iommufd, vbasedev->name,
> + vbasedev->fd, pasid, id);
> return 0;
> }
>
> -static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
> +static bool iommufd_cdev_pasid_detach_ioas_hwpt(VFIODevice *vbasedev,
> + uint32_t pasid, Error **errp)
> {
> int iommufd = vbasedev->iommufd->fd;
> struct vfio_device_detach_iommufd_pt detach_data = {
> .argsz = sizeof(detach_data),
> - .flags = 0,
> + .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_DETACH_PASID,
> + .pasid = pasid,
> };
>
> if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) {
> - error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
> + error_setg_errno(errp, errno, "detach %s pasid %d failed",
> + vbasedev->name, pasid);
> return false;
> }
>
> - trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
> + trace_iommufd_cdev_pasid_detach_ioas_hwpt(iommufd, vbasedev->name, pasid);
> return true;
> }
>
> @@ -359,7 +365,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> /* Try to find a domain */
> QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
> if (!cpr_is_incoming()) {
> - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
> + ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
> + hwpt->hwpt_id, errp);
> } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
> ret = 0;
> } else {
> @@ -432,7 +439,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> return false;
> }
>
> - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
> + ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, hwpt_id,
> + errp);
> if (ret) {
> iommufd_backend_free_id(container->be, hwpt_id);
> return false;
> @@ -485,7 +493,8 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
>
> /* If CPR, we are already attached to ioas_id. */
> return cpr_is_incoming() ||
> - !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
> + !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
> + container->ioas_id, errp);
> }
>
> static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
> @@ -493,7 +502,7 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
> {
> Error *err = NULL;
>
> - if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
> + if (!iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, &err)) {
> error_report_err(err);
> }
>
> @@ -919,7 +928,8 @@ host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> {
> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>
> - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
> + return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
> + hwpt_id, errp);
> }
>
> static bool
> @@ -928,7 +938,7 @@ host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> {
> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>
> - return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
> + return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
> }
>
> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 846e3625c5..764a3e4855 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -182,8 +182,8 @@ vfio_vmstate_change_prepare(const char *name, int running, const char *reason, c
>
> iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
> iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
> -iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
> -iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s"
> +iommufd_cdev_pasid_attach_ioas_hwpt(int iommufd, const char *name, int devfd, uint32_t pasid, int id) " [iommufd=%d] Successfully attached device %s (%d) pasid %d to id=%d"
> +iommufd_cdev_pasid_detach_ioas_hwpt(int iommufd, const char *name, uint32_t pasid) " [iommufd=%d] Successfully detached %s pasid %d"
> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid
2026-03-18 11:55 ` Yi Liu
@ 2026-03-19 7:43 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 7:43 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback
>implementations with pasid
>
>On 3/6/26 11:43, Zhenzhong Duan wrote:
>> For attachment with pasid, pasid together with flag
>VFIO_DEVICE_ATTACH_PASID
>> should be passed in.
>>
>> Define IOMMU_NO_PASID to represent device attachment without pasid same
>as
>> in kernel.
>>
>> The implementation is similar for detachment.
>>
>> Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/hw/core/iommu.h | 2 ++
>> hw/vfio/iommufd.c | 44 +++++++++++++++++++++++++----------------
>> hw/vfio/trace-events | 4 ++--
>> 3 files changed, 31 insertions(+), 19 deletions(-)
>>
>> diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
>> index 86af315c15..bfcd511013 100644
>> --- a/include/hw/core/iommu.h
>> +++ b/include/hw/core/iommu.h
>> @@ -28,4 +28,6 @@ enum host_iommu_quirks {
>> HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO = BIT_ULL(0),
>> };
>>
>> +#define IOMMU_NO_PASID 0
>> +
>> #endif /* HW_IOMMU_H */
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 131612eb83..b4c5e81b1d 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -21,6 +21,7 @@
>> #include "qapi/error.h"
>> #include "system/iommufd.h"
>> #include "hw/core/qdev.h"
>> +#include "hw/core/iommu.h"
>
>nit: move this line before qdev.h. Other parts LGTM.
Will do.
Thanks
Zhenzhong
>
>Reviewed-by: Yi Liu <yi.l.liu@intel.com>
>
>> #include "hw/vfio/vfio-cpr.h"
>> #include "system/reset.h"
>> #include "qemu/cutils.h"
>> @@ -302,43 +303,48 @@ out:
>> return ret;
>> }
>>
>> -static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
>> - Error **errp)
>> +static int iommufd_cdev_pasid_attach_ioas_hwpt(VFIODevice *vbasedev,
>> + uint32_t pasid, uint32_t id,
>> + Error **errp)
>> {
>> int iommufd = vbasedev->iommufd->fd;
>> struct vfio_device_attach_iommufd_pt attach_data = {
>> .argsz = sizeof(attach_data),
>> - .flags = 0,
>> + .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_ATTACH_PASID,
>> + .pasid = pasid,
>> .pt_id = id,
>> };
>>
>> /* Attach device to an IOAS or hwpt within iommufd */
>> if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data))
>{
>> error_setg_errno(errp, errno,
>> - "[iommufd=%d] error attach %s (%d) to id=%d",
>> - iommufd, vbasedev->name, vbasedev->fd, id);
>> + "[iommufd=%d] error attach %s (%d) pasid %d to id=%d",
>> + iommufd, vbasedev->name, vbasedev->fd, pasid, id);
>> return -errno;
>> }
>>
>> - trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
>> - vbasedev->fd, id);
>> + trace_iommufd_cdev_pasid_attach_ioas_hwpt(iommufd, vbasedev->name,
>> + vbasedev->fd, pasid, id);
>> return 0;
>> }
>>
>> -static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error
>**errp)
>> +static bool iommufd_cdev_pasid_detach_ioas_hwpt(VFIODevice *vbasedev,
>> + uint32_t pasid, Error **errp)
>> {
>> int iommufd = vbasedev->iommufd->fd;
>> struct vfio_device_detach_iommufd_pt detach_data = {
>> .argsz = sizeof(detach_data),
>> - .flags = 0,
>> + .flags = pasid == IOMMU_NO_PASID ? 0 : VFIO_DEVICE_DETACH_PASID,
>> + .pasid = pasid,
>> };
>>
>> if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data))
>{
>> - error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
>> + error_setg_errno(errp, errno, "detach %s pasid %d failed",
>> + vbasedev->name, pasid);
>> return false;
>> }
>>
>> - trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
>> + trace_iommufd_cdev_pasid_detach_ioas_hwpt(iommufd, vbasedev->name,
>pasid);
>> return true;
>> }
>>
>> @@ -359,7 +365,8 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> /* Try to find a domain */
>> QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
>> if (!cpr_is_incoming()) {
>> - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
>> + ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev,
>IOMMU_NO_PASID,
>> + hwpt->hwpt_id, errp);
>> } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
>> ret = 0;
>> } else {
>> @@ -432,7 +439,8 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> return false;
>> }
>>
>> - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
>> + ret = iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
>hwpt_id,
>> + errp);
>> if (ret) {
>> iommufd_backend_free_id(container->be, hwpt_id);
>> return false;
>> @@ -485,7 +493,8 @@ static bool iommufd_cdev_attach_container(VFIODevice
>*vbasedev,
>>
>> /* If CPR, we are already attached to ioas_id. */
>> return cpr_is_incoming() ||
>> - !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
>> + !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
>> + container->ioas_id, errp);
>> }
>>
>> static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
>> @@ -493,7 +502,7 @@ static void
>iommufd_cdev_detach_container(VFIODevice *vbasedev,
>> {
>> Error *err = NULL;
>>
>> - if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
>> + if (!iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
>&err)) {
>> error_report_err(err);
>> }
>>
>> @@ -919,7 +928,8 @@
>host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD
>*idev,
>> {
>> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>>
>> - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
>> + return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev,
>IOMMU_NO_PASID,
>> + hwpt_id, errp);
>> }
>>
>> static bool
>> @@ -928,7 +938,7 @@
>host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD
>*idev,
>> {
>> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>>
>> - return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
>> + return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev,
>IOMMU_NO_PASID, errp);
>> }
>>
>> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 846e3625c5..764a3e4855 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -182,8 +182,8 @@ vfio_vmstate_change_prepare(const char *name, int
>running, const char *reason, c
>>
>> iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int
>devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
>> iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
>> -iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int
>id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
>> -iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) "
>[iommufd=%d] Successfully detached %s"
>> +iommufd_cdev_pasid_attach_ioas_hwpt(int iommufd, const char *name, int
>devfd, uint32_t pasid, int id) " [iommufd=%d] Successfully attached device %s (%d)
>pasid %d to id=%d"
>> +iommufd_cdev_pasid_detach_ioas_hwpt(int iommufd, const char *name,
>uint32_t pasid) " [iommufd=%d] Successfully detached %s pasid %d"
>> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
>> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new
>IOMMUFD container with ioasid=%d"
>> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int
>num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
2026-03-06 3:43 ` [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid Zhenzhong Duan
@ 2026-03-06 3:43 ` Zhenzhong Duan
2026-03-18 12:03 ` Yi Liu
2026-03-06 3:43 ` [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag Zhenzhong Duan
` (10 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:43 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan, qemu-arm
Same for the two wrappers and their call sites.
Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/system/iommufd.h | 16 +++++++++++-----
backends/iommufd.c | 9 +++++----
hw/arm/smmuv3-accel.c | 12 ++++++++----
hw/i386/intel_iommu_accel.c | 8 +++++---
hw/vfio/iommufd.c | 10 +++++-----
5 files changed, 34 insertions(+), 21 deletions(-)
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 80d72469a9..80c8423654 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -124,14 +124,16 @@ struct HostIOMMUDeviceIOMMUFDClass {
*
* @idev: host IOMMU device backed by IOMMUFD backend.
*
+ * @pasid: pasid of host IOMMU device.
+ *
* @hwpt_id: ID of IOMMUFD hardware page table.
*
* @errp: pass an Error out when attachment fails.
*
* Returns: true on success, false on failure.
*/
- bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
- Error **errp);
+ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
+ uint32_t hwpt_id, Error **errp);
/**
* @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
* VFIO and VDPA device can have different implementation.
@@ -140,15 +142,19 @@ struct HostIOMMUDeviceIOMMUFDClass {
*
* @idev: host IOMMU device backed by IOMMUFD backend.
*
+ * @pasid: pasid of host IOMMU device.
+ *
* @errp: pass an Error out when attachment fails.
*
* Returns: true on success, false on failure.
*/
- bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
+ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
+ Error **errp);
};
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
- uint32_t hwpt_id, Error **errp);
-bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t pasid, uint32_t hwpt_id,
Error **errp);
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t pasid, Error **errp);
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 13822df82f..153edf4f79 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -505,23 +505,24 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
}
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
- uint32_t hwpt_id, Error **errp)
+ uint32_t pasid, uint32_t hwpt_id,
+ Error **errp)
{
HostIOMMUDeviceIOMMUFDClass *idevc =
HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
g_assert(idevc->attach_hwpt);
- return idevc->attach_hwpt(idev, hwpt_id, errp);
+ return idevc->attach_hwpt(idev, pasid, hwpt_id, errp);
}
bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
- Error **errp)
+ uint32_t pasid, Error **errp)
{
HostIOMMUDeviceIOMMUFDClass *idevc =
HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
g_assert(idevc->detach_hwpt);
- return idevc->detach_hwpt(idev, errp);
+ return idevc->detach_hwpt(idev, pasid, errp);
}
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index f5cd4df336..6b04344959 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -294,7 +294,8 @@ bool smmuv3_accel_install_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
return false;
}
- if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
+ errp)) {
if (s1_hwpt) {
iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
g_free(s1_hwpt);
@@ -436,7 +437,8 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
/* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
- if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
+ errp)) {
goto free_bypass_hwpt;
}
accel->viommu = viommu;
@@ -524,7 +526,8 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
idev = accel_dev->idev;
accel = accel_dev->s_accel;
/* Re-attach the default s2 hwpt id */
- if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) {
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
+ idev->hwpt_id, NULL)) {
error_report("Unable to attach the default HW pagetable: idev devid "
"0x%x", idev->devid);
}
@@ -720,7 +723,8 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp)
hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
QLIST_FOREACH(accel_dev, &accel->device_list, next) {
- if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev, hwpt_id,
+ if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
+ IOMMU_NO_PASID, hwpt_id,
&local_err)) {
error_append_hint(&local_err, "Failed to attach GBPA hwpt %u for "
"idev devid %u", hwpt_id, accel_dev->idev->devid);
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 67d54849f2..45c08c8f6f 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -121,7 +121,8 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
}
- ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
+ ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
+ errp);
trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
@@ -145,7 +146,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
bool ret;
if (s->dmar_enabled && s->root_scalable) {
- ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
+ ret = host_iommu_device_iommufd_detach_hwpt(idev, IOMMU_NO_PASID, errp);
trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
} else {
/*
@@ -153,7 +154,8 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
* we fallback to the default HWPT which contains shadow page table.
* So guest DMA could still work.
*/
- ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
+ ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
+ idev->hwpt_id, errp);
trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
ret);
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index b4c5e81b1d..005f97fe25 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -924,21 +924,21 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
static bool
host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
- uint32_t hwpt_id, Error **errp)
+ uint32_t pasid, uint32_t hwpt_id,
+ Error **errp)
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
- return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
- hwpt_id, errp);
+ return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, pasid, hwpt_id, errp);
}
static bool
host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
- Error **errp)
+ uint32_t pasid, Error **errp)
{
VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
- return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
+ return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, pasid, errp);
}
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid
2026-03-06 3:43 ` [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid Zhenzhong Duan
@ 2026-03-18 12:03 ` Yi Liu
2026-03-18 12:15 ` Yi Liu
2026-03-19 7:47 ` Duan, Zhenzhong
0 siblings, 2 replies; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:03 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao,
qemu-arm
On 3/6/26 11:43, Zhenzhong Duan wrote:
> Same for the two wrappers and their call sites.
>
> Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/system/iommufd.h | 16 +++++++++++-----
> backends/iommufd.c | 9 +++++----
> hw/arm/smmuv3-accel.c | 12 ++++++++----
> hw/i386/intel_iommu_accel.c | 8 +++++---
> hw/vfio/iommufd.c | 10 +++++-----
> 5 files changed, 34 insertions(+), 21 deletions(-)
>
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index 80d72469a9..80c8423654 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -124,14 +124,16 @@ struct HostIOMMUDeviceIOMMUFDClass {
> *
> * @idev: host IOMMU device backed by IOMMUFD backend.
> *
> + * @pasid: pasid of host IOMMU device.
this comment is a bit misleading although I know what you mean. Perhaps
"pasid of AS" or "target pasid of attach". Other parts LGTM.
Yi Liu <yi.l.liu@intel.com>
> + *
> * @hwpt_id: ID of IOMMUFD hardware page table.
> *
> * @errp: pass an Error out when attachment fails.
> *
> * Returns: true on success, false on failure.
> */
> - bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
> - Error **errp);
> + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
> + uint32_t hwpt_id, Error **errp);
> /**
> * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
> * VFIO and VDPA device can have different implementation.
> @@ -140,15 +142,19 @@ struct HostIOMMUDeviceIOMMUFDClass {
> *
> * @idev: host IOMMU device backed by IOMMUFD backend.
> *
> + * @pasid: pasid of host IOMMU device.
> + *
> * @errp: pass an Error out when attachment fails.
> *
> * Returns: true on success, false on failure.
> */
> - bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
> + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
> + Error **errp);
> };
>
> bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> - uint32_t hwpt_id, Error **errp);
> -bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> + uint32_t pasid, uint32_t hwpt_id,
> Error **errp);
> +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> + uint32_t pasid, Error **errp);
> #endif
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 13822df82f..153edf4f79 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -505,23 +505,24 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
> }
>
> bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> - uint32_t hwpt_id, Error **errp)
> + uint32_t pasid, uint32_t hwpt_id,
> + Error **errp)
> {
> HostIOMMUDeviceIOMMUFDClass *idevc =
> HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
>
> g_assert(idevc->attach_hwpt);
> - return idevc->attach_hwpt(idev, hwpt_id, errp);
> + return idevc->attach_hwpt(idev, pasid, hwpt_id, errp);
> }
>
> bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> - Error **errp)
> + uint32_t pasid, Error **errp)
> {
> HostIOMMUDeviceIOMMUFDClass *idevc =
> HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
>
> g_assert(idevc->detach_hwpt);
> - return idevc->detach_hwpt(idev, errp);
> + return idevc->detach_hwpt(idev, pasid, errp);
> }
>
> static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index f5cd4df336..6b04344959 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -294,7 +294,8 @@ bool smmuv3_accel_install_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> return false;
> }
>
> - if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
> + errp)) {
> if (s1_hwpt) {
> iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
> g_free(s1_hwpt);
> @@ -436,7 +437,8 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
>
> /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> - if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
> + errp)) {
> goto free_bypass_hwpt;
> }
> accel->viommu = viommu;
> @@ -524,7 +526,8 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> idev = accel_dev->idev;
> accel = accel_dev->s_accel;
> /* Re-attach the default s2 hwpt id */
> - if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) {
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
> + idev->hwpt_id, NULL)) {
> error_report("Unable to attach the default HW pagetable: idev devid "
> "0x%x", idev->devid);
> }
> @@ -720,7 +723,8 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp)
>
> hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> QLIST_FOREACH(accel_dev, &accel->device_list, next) {
> - if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev, hwpt_id,
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + IOMMU_NO_PASID, hwpt_id,
> &local_err)) {
> error_append_hint(&local_err, "Failed to attach GBPA hwpt %u for "
> "idev devid %u", hwpt_id, accel_dev->idev->devid);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 67d54849f2..45c08c8f6f 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -121,7 +121,8 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> }
> }
>
> - ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
> + ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
> + errp);
> trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
> if (ret) {
> /* Destroy old fs_hwpt if it's a replacement */
> @@ -145,7 +146,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> bool ret;
>
> if (s->dmar_enabled && s->root_scalable) {
> - ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
> + ret = host_iommu_device_iommufd_detach_hwpt(idev, IOMMU_NO_PASID, errp);
> trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
> } else {
> /*
> @@ -153,7 +154,8 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> * we fallback to the default HWPT which contains shadow page table.
> * So guest DMA could still work.
> */
> - ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
> + ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
> + idev->hwpt_id, errp);
> trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
> ret);
> }
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index b4c5e81b1d..005f97fe25 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -924,21 +924,21 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
>
> static bool
> host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> - uint32_t hwpt_id, Error **errp)
> + uint32_t pasid, uint32_t hwpt_id,
> + Error **errp)
> {
> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>
> - return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, IOMMU_NO_PASID,
> - hwpt_id, errp);
> + return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, pasid, hwpt_id, errp);
> }
>
> static bool
> host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> - Error **errp)
> + uint32_t pasid, Error **errp)
> {
> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>
> - return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, IOMMU_NO_PASID, errp);
> + return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, pasid, errp);
> }
>
> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
^ permalink raw reply [flat|nested] 45+ messages in thread* Re: [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid
2026-03-18 12:03 ` Yi Liu
@ 2026-03-18 12:15 ` Yi Liu
2026-03-19 7:47 ` Duan, Zhenzhong
1 sibling, 0 replies; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:15 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao,
qemu-arm
On 3/18/26 20:03, Yi Liu wrote:
> On 3/6/26 11:43, Zhenzhong Duan wrote:
>> Same for the two wrappers and their call sites.
>>
>> Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/system/iommufd.h | 16 +++++++++++-----
>> backends/iommufd.c | 9 +++++----
>> hw/arm/smmuv3-accel.c | 12 ++++++++----
>> hw/i386/intel_iommu_accel.c | 8 +++++---
>> hw/vfio/iommufd.c | 10 +++++-----
>> 5 files changed, 34 insertions(+), 21 deletions(-)
>>
>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>> index 80d72469a9..80c8423654 100644
>> --- a/include/system/iommufd.h
>> +++ b/include/system/iommufd.h
>> @@ -124,14 +124,16 @@ struct HostIOMMUDeviceIOMMUFDClass {
>> *
>> * @idev: host IOMMU device backed by IOMMUFD backend.
>> *
>> + * @pasid: pasid of host IOMMU device.
>
> this comment is a bit misleading although I know what you mean. Perhaps
> "pasid of AS" or "target pasid of attach". Other parts LGTM.
>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid
2026-03-18 12:03 ` Yi Liu
2026-03-18 12:15 ` Yi Liu
@ 2026-03-19 7:47 ` Duan, Zhenzhong
1 sibling, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 7:47 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong, qemu-arm@nongnu.org
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to
>support pasid
>
>On 3/6/26 11:43, Zhenzhong Duan wrote:
>> Same for the two wrappers and their call sites.
>>
>> Suggested-by: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/system/iommufd.h | 16 +++++++++++-----
>> backends/iommufd.c | 9 +++++----
>> hw/arm/smmuv3-accel.c | 12 ++++++++----
>> hw/i386/intel_iommu_accel.c | 8 +++++---
>> hw/vfio/iommufd.c | 10 +++++-----
>> 5 files changed, 34 insertions(+), 21 deletions(-)
>>
>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>> index 80d72469a9..80c8423654 100644
>> --- a/include/system/iommufd.h
>> +++ b/include/system/iommufd.h
>> @@ -124,14 +124,16 @@ struct HostIOMMUDeviceIOMMUFDClass {
>> *
>> * @idev: host IOMMU device backed by IOMMUFD backend.
>> *
>> + * @pasid: pasid of host IOMMU device.
>
>this comment is a bit misleading although I know what you mean. Perhaps
>"pasid of AS" or "target pasid of attach". Other parts LGTM.
Will use "target pasid of attach"
>
>Yi Liu <yi.l.liu@intel.com>
I guess you mean Reviewed-by: Yi Liu <yi.l.liu@intel.com>😊
Thanks
Zhenzhong
>
>> + *
>> * @hwpt_id: ID of IOMMUFD hardware page table.
>> *
>> * @errp: pass an Error out when attachment fails.
>> *
>> * Returns: true on success, false on failure.
>> */
>> - bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
>> - Error **errp);
>> + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
>> + uint32_t hwpt_id, Error **errp);
>> /**
>> * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware
>page table.
>> * VFIO and VDPA device can have different implementation.
>> @@ -140,15 +142,19 @@ struct HostIOMMUDeviceIOMMUFDClass {
>> *
>> * @idev: host IOMMU device backed by IOMMUFD backend.
>> *
>> + * @pasid: pasid of host IOMMU device.
>> + *
>> * @errp: pass an Error out when attachment fails.
>> *
>> * Returns: true on success, false on failure.
>> */
>> - bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
>> + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t pasid,
>> + Error **errp);
>> };
>>
>> bool
>host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> - uint32_t hwpt_id, Error **errp);
>> -bool
>host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> + uint32_t pasid, uint32_t hwpt_id,
>> Error **errp);
>> +bool
>host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> + uint32_t pasid, Error **errp);
>> #endif
>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>> index 13822df82f..153edf4f79 100644
>> --- a/backends/iommufd.c
>> +++ b/backends/iommufd.c
>> @@ -505,23 +505,24 @@ bool
>iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
>> }
>>
>> bool
>host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> - uint32_t hwpt_id, Error **errp)
>> + uint32_t pasid, uint32_t hwpt_id,
>> + Error **errp)
>> {
>> HostIOMMUDeviceIOMMUFDClass *idevc =
>> HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
>>
>> g_assert(idevc->attach_hwpt);
>> - return idevc->attach_hwpt(idev, hwpt_id, errp);
>> + return idevc->attach_hwpt(idev, pasid, hwpt_id, errp);
>> }
>>
>> bool
>host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> - Error **errp)
>> + uint32_t pasid, Error **errp)
>> {
>> HostIOMMUDeviceIOMMUFDClass *idevc =
>> HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
>>
>> g_assert(idevc->detach_hwpt);
>> - return idevc->detach_hwpt(idev, errp);
>> + return idevc->detach_hwpt(idev, pasid, errp);
>> }
>>
>> static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error
>**errp)
>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>> index f5cd4df336..6b04344959 100644
>> --- a/hw/arm/smmuv3-accel.c
>> +++ b/hw/arm/smmuv3-accel.c
>> @@ -294,7 +294,8 @@ bool smmuv3_accel_install_ste(SMMUv3State *s,
>SMMUDevice *sdev, int sid,
>> return false;
>> }
>>
>> - if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
>> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
>hwpt_id,
>> + errp)) {
>> if (s1_hwpt) {
>> iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
>> g_free(s1_hwpt);
>> @@ -436,7 +437,8 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
>HostIOMMUDeviceIOMMUFD *idev,
>>
>> /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
>> hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
>> - if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
>> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
>hwpt_id,
>> + errp)) {
>> goto free_bypass_hwpt;
>> }
>> accel->viommu = viommu;
>> @@ -524,7 +526,8 @@ static void smmuv3_accel_unset_iommu_device(PCIBus
>*bus, void *opaque,
>> idev = accel_dev->idev;
>> accel = accel_dev->s_accel;
>> /* Re-attach the default s2 hwpt id */
>> - if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) {
>> + if (!host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
>> + idev->hwpt_id, NULL)) {
>> error_report("Unable to attach the default HW pagetable: idev devid "
>> "0x%x", idev->devid);
>> }
>> @@ -720,7 +723,8 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State
>*s, Error **errp)
>>
>> hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
>> QLIST_FOREACH(accel_dev, &accel->device_list, next) {
>> - if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev, hwpt_id,
>> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
>> + IOMMU_NO_PASID, hwpt_id,
>> &local_err)) {
>> error_append_hint(&local_err, "Failed to attach GBPA hwpt %u for "
>> "idev devid %u", hwpt_id, accel_dev->idev->devid);
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index 67d54849f2..45c08c8f6f 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -121,7 +121,8 @@ static bool
>vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> }
>> }
>>
>> - ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
>> + ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
>hwpt_id,
>> + errp);
>> trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
>> if (ret) {
>> /* Destroy old fs_hwpt if it's a replacement */
>> @@ -145,7 +146,7 @@ static bool
>vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> bool ret;
>>
>> if (s->dmar_enabled && s->root_scalable) {
>> - ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
>> + ret = host_iommu_device_iommufd_detach_hwpt(idev,
>IOMMU_NO_PASID, errp);
>> trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
>> } else {
>> /*
>> @@ -153,7 +154,8 @@ static bool
>vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> * we fallback to the default HWPT which contains shadow page table.
>> * So guest DMA could still work.
>> */
>> - ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id,
>errp);
>> + ret = host_iommu_device_iommufd_attach_hwpt(idev,
>IOMMU_NO_PASID,
>> + idev->hwpt_id, errp);
>> trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
>> ret);
>> }
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index b4c5e81b1d..005f97fe25 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -924,21 +924,21 @@ static void
>vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
>>
>> static bool
>>
>host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD
>*idev,
>> - uint32_t hwpt_id, Error **errp)
>> + uint32_t pasid, uint32_t hwpt_id,
>> + Error **errp)
>> {
>> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>>
>> - return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev,
>IOMMU_NO_PASID,
>> - hwpt_id, errp);
>> + return !iommufd_cdev_pasid_attach_ioas_hwpt(vbasedev, pasid, hwpt_id,
>errp);
>> }
>>
>> static bool
>>
>host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD
>*idev,
>> - Error **errp)
>> + uint32_t pasid, Error **errp)
>> {
>> VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
>>
>> - return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev,
>IOMMU_NO_PASID, errp);
>> + return iommufd_cdev_pasid_detach_ioas_hwpt(vbasedev, pasid, errp);
>> }
>>
>> static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
2026-03-06 3:43 ` [PATCH v1 01/13] vfio/iommufd: Extend attach/detach_hwpt callback implementations with pasid Zhenzhong Duan
2026-03-06 3:43 ` [PATCH v1 02/13] iommufd: Extend attach/detach_hwpt callbacks to support pasid Zhenzhong Duan
@ 2026-03-06 3:43 ` Zhenzhong Duan
2026-03-18 12:15 ` Yi Liu
2026-03-06 3:43 ` [PATCH v1 04/13] intel_iommu: Create the nested " Zhenzhong Duan
` (9 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:43 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When both host device and vIOMMU have PASID enabled, then guest may
setup pasid attached translation.
We need to create the nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID
flag because according to uAPI, any domain attached to the non-PASID
part of the device must also be flagged, otherwise attaching a PASID
will blocked.
Introduce a vfio_device_get_viommu_flags_pasid_supported() helper to
facilitate this implementation.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/vfio/vfio-device.h | 1 +
hw/vfio/device.c | 11 +++++++++++
hw/vfio/iommufd.c | 8 +++++++-
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 828a31c006..dd0355eb3d 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -268,6 +268,7 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
void vfio_device_unprepare(VFIODevice *vbasedev);
bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
+bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev);
bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
uint32_t type, void *caps,
uint32_t size);
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 973fc35b59..b15ca6ef0a 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -533,6 +533,17 @@ bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
return false;
}
+bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev)
+{
+ VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
+
+ if (vdev) {
+ return !!(pci_device_get_viommu_flags(PCI_DEVICE(vdev)) &
+ VIOMMU_FLAG_PASID_SUPPORTED);
+ }
+ return false;
+}
+
bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
uint32_t type, void *caps,
uint32_t size)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 005f97fe25..c408f9151b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -360,6 +360,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
VendorCaps caps;
VFIOIOASHwpt *hwpt;
uint32_t hwpt_id;
+ uint8_t max_pasid_log2 = 0;
int ret;
/* Try to find a domain */
@@ -405,7 +406,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
*/
if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
&type, &caps, sizeof(caps), &hw_caps,
- NULL, errp)) {
+ &max_pasid_log2, errp)) {
return false;
}
@@ -427,6 +428,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
}
}
+ if (max_pasid_log2 &&
+ vfio_device_get_viommu_flags_pasid_supported(vbasedev)) {
+ flags |= IOMMU_HWPT_ALLOC_PASID;
+ }
+
if (cpr_is_incoming()) {
hwpt_id = vbasedev->cpr.hwpt_id;
goto skip_alloc;
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-06 3:43 ` [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag Zhenzhong Duan
@ 2026-03-18 12:15 ` Yi Liu
2026-03-19 7:54 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:15 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:43, Zhenzhong Duan wrote:
> When both host device and vIOMMU have PASID enabled, then guest may
> setup pasid attached translation.
Better be "When both both device and vIOMMU". host device enabled pasid
does not mean QEMU will for sure report pasid PCI ecap to guest. :)
> We need to create the nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID
> flag because according to uAPI, any domain attached to the non-PASID
> part of the device must also be flagged, otherwise attaching a PASID
> will blocked.
This may make me thinking about why nested parent hwpt is special here.
I think you just call out vfio needs to be aware of potential pasid
usage and should attach the non-pasid part of pasid-capable device to
hwpt flagged with IOMMU_HWPT_ALLOC_PASID.
Code change LGTM.
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> Introduce a vfio_device_get_viommu_flags_pasid_supported() helper to
> facilitate this implementation.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/hw/vfio/vfio-device.h | 1 +
> hw/vfio/device.c | 11 +++++++++++
> hw/vfio/iommufd.c | 8 +++++++-
> 3 files changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 828a31c006..dd0355eb3d 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -268,6 +268,7 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
> void vfio_device_unprepare(VFIODevice *vbasedev);
>
> bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
> +bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev);
> bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
> uint32_t type, void *caps,
> uint32_t size);
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 973fc35b59..b15ca6ef0a 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -533,6 +533,17 @@ bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
> return false;
> }
>
> +bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev)
> +{
> + VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
> +
> + if (vdev) {
> + return !!(pci_device_get_viommu_flags(PCI_DEVICE(vdev)) &
> + VIOMMU_FLAG_PASID_SUPPORTED);
> + }
> + return false;
> +}
> +
> bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
> uint32_t type, void *caps,
> uint32_t size)
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 005f97fe25..c408f9151b 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -360,6 +360,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> VendorCaps caps;
> VFIOIOASHwpt *hwpt;
> uint32_t hwpt_id;
> + uint8_t max_pasid_log2 = 0;
> int ret;
>
> /* Try to find a domain */
> @@ -405,7 +406,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> */
> if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> &type, &caps, sizeof(caps), &hw_caps,
> - NULL, errp)) {
> + &max_pasid_log2, errp)) {
> return false;
> }
>
> @@ -427,6 +428,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
> }
> }
>
> + if (max_pasid_log2 &&
> + vfio_device_get_viommu_flags_pasid_supported(vbasedev)) {
> + flags |= IOMMU_HWPT_ALLOC_PASID;
> + }
> +
> if (cpr_is_incoming()) {
> hwpt_id = vbasedev->cpr.hwpt_id;
> goto skip_alloc;
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-18 12:15 ` Yi Liu
@ 2026-03-19 7:54 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 7:54 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with
>IOMMU_HWPT_ALLOC_PASID flag
>
>On 3/6/26 11:43, Zhenzhong Duan wrote:
>> When both host device and vIOMMU have PASID enabled, then guest may
>> setup pasid attached translation.
>
>Better be "When both both device and vIOMMU". host device enabled pasid
>does not mean QEMU will for sure report pasid PCI ecap to guest. :)
Sure.
>
>> We need to create the nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID
>> flag because according to uAPI, any domain attached to the non-PASID
>> part of the device must also be flagged, otherwise attaching a PASID
>> will blocked.
>
>This may make me thinking about why nested parent hwpt is special here.
>I think you just call out vfio needs to be aware of potential pasid
>usage and should attach the non-pasid part of pasid-capable device to
>hwpt flagged with IOMMU_HWPT_ALLOC_PASID.
Exactly.
Thanks
Zhenzhong
>
>Code change LGTM.
>
>Reviewed-by: Yi Liu <yi.l.liu@intel.com>
>
>> Introduce a vfio_device_get_viommu_flags_pasid_supported() helper to
>> facilitate this implementation.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/hw/vfio/vfio-device.h | 1 +
>> hw/vfio/device.c | 11 +++++++++++
>> hw/vfio/iommufd.c | 8 +++++++-
>> 3 files changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
>> index 828a31c006..dd0355eb3d 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -268,6 +268,7 @@ void vfio_device_prepare(VFIODevice *vbasedev,
>VFIOContainer *bcontainer,
>> void vfio_device_unprepare(VFIODevice *vbasedev);
>>
>> bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
>> +bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev);
>> bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
>> uint32_t type, void *caps,
>> uint32_t size);
>> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
>> index 973fc35b59..b15ca6ef0a 100644
>> --- a/hw/vfio/device.c
>> +++ b/hw/vfio/device.c
>> @@ -533,6 +533,17 @@ bool
>vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
>> return false;
>> }
>>
>> +bool vfio_device_get_viommu_flags_pasid_supported(VFIODevice *vbasedev)
>> +{
>> + VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
>> +
>> + if (vdev) {
>> + return !!(pci_device_get_viommu_flags(PCI_DEVICE(vdev)) &
>> + VIOMMU_FLAG_PASID_SUPPORTED);
>> + }
>> + return false;
>> +}
>> +
>> bool vfio_device_get_host_iommu_quirk_bypass_ro(VFIODevice *vbasedev,
>> uint32_t type, void *caps,
>> uint32_t size)
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 005f97fe25..c408f9151b 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -360,6 +360,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> VendorCaps caps;
>> VFIOIOASHwpt *hwpt;
>> uint32_t hwpt_id;
>> + uint8_t max_pasid_log2 = 0;
>> int ret;
>>
>> /* Try to find a domain */
>> @@ -405,7 +406,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> */
>> if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev-
>>devid,
>> &type, &caps, sizeof(caps), &hw_caps,
>> - NULL, errp)) {
>> + &max_pasid_log2, errp)) {
>> return false;
>> }
>>
>> @@ -427,6 +428,11 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>> }
>> }
>>
>> + if (max_pasid_log2 &&
>> + vfio_device_get_viommu_flags_pasid_supported(vbasedev)) {
>> + flags |= IOMMU_HWPT_ALLOC_PASID;
>> + }
>> +
>> if (cpr_is_incoming()) {
>> hwpt_id = vbasedev->cpr.hwpt_id;
>> goto skip_alloc;
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 04/13] intel_iommu: Create the nested hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (2 preceding siblings ...)
2026-03-06 3:43 ` [PATCH v1 03/13] vfio/iommufd: Create nesting parent hwpt with IOMMU_HWPT_ALLOC_PASID flag Zhenzhong Duan
@ 2026-03-06 3:43 ` Zhenzhong Duan
2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
2026-03-18 12:18 ` Yi Liu
2026-03-06 3:43 ` [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8 Zhenzhong Duan
` (8 subsequent siblings)
12 siblings, 2 replies; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:43 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When pasid is enabled, any hwpt attached to non-PASID or PASID should be
IOMMU_HWPT_ALLOC_PASID flagged, or else attachment fails.
Change vtd_destroy_old_fs_hwpt() to pass in 'VTDHostIOMMUDevice *' for
naming consistency.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 45c08c8f6f..c2757f3bcd 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -69,11 +69,13 @@ VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
return NULL;
}
-static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
VTDPASIDEntry *pe, uint32_t *fs_hwpt_id,
Error **errp)
{
+ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
struct iommu_hwpt_vtd_s1 vtd = {};
+ uint32_t flags = vtd_hiod->iommu_state->pasid ? IOMMU_HWPT_ALLOC_PASID : 0;
vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE : 0) |
(VTD_SM_PASID_ENTRY_WPE(pe) ? IOMMU_VTD_S1_WPE : 0) |
@@ -82,13 +84,15 @@ static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
return iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->hwpt_id,
- 0, IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd),
- &vtd, fs_hwpt_id, errp);
+ flags, IOMMU_HWPT_DATA_VTD_S1,
+ sizeof(vtd), &vtd, fs_hwpt_id, errp);
}
-static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
VTDAddressSpace *vtd_as)
{
+ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
+
if (!vtd_as->fs_hwpt_id) {
return;
}
@@ -116,7 +120,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
if (vtd_pe_pgtt_is_fst(pe)) {
- if (!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
+ if (!vtd_create_fs_hwpt(vtd_hiod, pe, &hwpt_id, errp)) {
return false;
}
}
@@ -126,7 +130,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
- vtd_destroy_old_fs_hwpt(idev, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
if (vtd_pe_pgtt_is_fst(pe)) {
vtd_as->fs_hwpt_id = hwpt_id;
}
@@ -161,7 +165,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
if (ret) {
- vtd_destroy_old_fs_hwpt(idev, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
}
return ret;
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 04/13] intel_iommu: Create the nested hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-06 3:43 ` [PATCH v1 04/13] intel_iommu: Create the nested " Zhenzhong Duan
@ 2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
2026-03-18 12:18 ` Yi Liu
1 sibling, 0 replies; 45+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2026-03-06 7:27 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, xudong.hao@intel.com
Hi Zhenzhong
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@bull.com>
On Thu, 2026-03-05 at 22:43 -0500, Zhenzhong Duan wrote:
> When pasid is enabled, any hwpt attached to non-PASID or PASID should be
> IOMMU_HWPT_ALLOC_PASID flagged, or else attachment fails.
>
> Change vtd_destroy_old_fs_hwpt() to pass in 'VTDHostIOMMUDevice *' for
> naming consistency.
>
> Signed-off-by: Zhenzhong Duan <[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>
> ---
> hw/i386/intel_iommu_accel.c | 18 +++++++++++-------
> 1 file changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 45c08c8f6f..c2757f3bcd 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -69,11 +69,13 @@ VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
> return NULL;
> }
>
> -static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
> VTDPASIDEntry *pe, uint32_t *fs_hwpt_id,
> Error **errp)
> {
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> struct iommu_hwpt_vtd_s1 vtd = {};
> + uint32_t flags = vtd_hiod->iommu_state->pasid ? IOMMU_HWPT_ALLOC_PASID : 0;
>
> vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE : 0) |
> (VTD_SM_PASID_ENTRY_WPE(pe) ? IOMMU_VTD_S1_WPE : 0) |
> @@ -82,13 +84,15 @@ static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
>
> return iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->hwpt_id,
> - 0, IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd),
> - &vtd, fs_hwpt_id, errp);
> + flags, IOMMU_HWPT_DATA_VTD_S1,
> + sizeof(vtd), &vtd, fs_hwpt_id, errp);
> }
>
> -static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
> VTDAddressSpace *vtd_as)
> {
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +
> if (!vtd_as->fs_hwpt_id) {
> return;
> }
> @@ -116,7 +120,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> }
>
> if (vtd_pe_pgtt_is_fst(pe)) {
> - if (!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> + if (!vtd_create_fs_hwpt(vtd_hiod, pe, &hwpt_id, errp)) {
> return false;
> }
> }
> @@ -126,7 +130,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
> if (ret) {
> /* Destroy old fs_hwpt if it's a replacement */
> - vtd_destroy_old_fs_hwpt(idev, vtd_as);
> + vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
> if (vtd_pe_pgtt_is_fst(pe)) {
> vtd_as->fs_hwpt_id = hwpt_id;
> }
> @@ -161,7 +165,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> }
>
> if (ret) {
> - vtd_destroy_old_fs_hwpt(idev, vtd_as);
> + vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
> }
>
> return ret;
^ permalink raw reply [flat|nested] 45+ messages in thread* Re: [PATCH v1 04/13] intel_iommu: Create the nested hwpt with IOMMU_HWPT_ALLOC_PASID flag
2026-03-06 3:43 ` [PATCH v1 04/13] intel_iommu: Create the nested " Zhenzhong Duan
2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
@ 2026-03-18 12:18 ` Yi Liu
1 sibling, 0 replies; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:18 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:43, Zhenzhong Duan wrote:
> When pasid is enabled, any hwpt attached to non-PASID or PASID should be
> IOMMU_HWPT_ALLOC_PASID flagged, or else attachment fails.
>
> Change vtd_destroy_old_fs_hwpt() to pass in 'VTDHostIOMMUDevice *' for
> naming consistency.
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_accel.c | 18 +++++++++++-------
> 1 file changed, 11 insertions(+), 7 deletions(-)
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 45c08c8f6f..c2757f3bcd 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -69,11 +69,13 @@ VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
> return NULL;
> }
>
> -static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
> VTDPASIDEntry *pe, uint32_t *fs_hwpt_id,
> Error **errp)
> {
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> struct iommu_hwpt_vtd_s1 vtd = {};
> + uint32_t flags = vtd_hiod->iommu_state->pasid ? IOMMU_HWPT_ALLOC_PASID : 0;
>
> vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE : 0) |
> (VTD_SM_PASID_ENTRY_WPE(pe) ? IOMMU_VTD_S1_WPE : 0) |
> @@ -82,13 +84,15 @@ static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
>
> return iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->hwpt_id,
> - 0, IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd),
> - &vtd, fs_hwpt_id, errp);
> + flags, IOMMU_HWPT_DATA_VTD_S1,
> + sizeof(vtd), &vtd, fs_hwpt_id, errp);
> }
>
> -static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
> VTDAddressSpace *vtd_as)
> {
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +
> if (!vtd_as->fs_hwpt_id) {
> return;
> }
> @@ -116,7 +120,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> }
>
> if (vtd_pe_pgtt_is_fst(pe)) {
> - if (!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> + if (!vtd_create_fs_hwpt(vtd_hiod, pe, &hwpt_id, errp)) {
> return false;
> }
> }
> @@ -126,7 +130,7 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
> if (ret) {
> /* Destroy old fs_hwpt if it's a replacement */
> - vtd_destroy_old_fs_hwpt(idev, vtd_as);
> + vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
> if (vtd_pe_pgtt_is_fst(pe)) {
> vtd_as->fs_hwpt_id = hwpt_id;
> }
> @@ -161,7 +165,7 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> }
>
> if (ret) {
> - vtd_destroy_old_fs_hwpt(idev, vtd_as);
> + vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
> }
>
> return ret;
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (3 preceding siblings ...)
2026-03-06 3:43 ` [PATCH v1 04/13] intel_iommu: Create the nested " Zhenzhong Duan
@ 2026-03-06 3:43 ` Zhenzhong Duan
2026-03-18 12:20 ` Yi Liu
2026-03-06 3:44 ` [PATCH v1 06/13] intel_iommu: Export some functions Zhenzhong Duan
` (7 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:43 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
'x-pasid-mode' is a bool property, we need an extra 'pss' property to
represent PASID size supported. Because there is no any device in QEMU
supporting pasid capability yet, no guest could use the pasid feature
until now, 'x-pasid-mode' takes no effect.
So instead of an extra 'pss' property we can use a single 'pasid'
property of uint8 type to represent if pasid is supported and the PASID
bits size. A value of N > 0 means pasid is supported and N - 1 is the
value in PSS field in ECAP register.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 +-
include/hw/i386/intel_iommu.h | 2 +-
hw/i386/intel_iommu.c | 5 +++--
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 11a53aa369..db4f186a3e 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,7 +195,7 @@
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
#define VTD_ECAP_NWFS (1ULL << 33)
-#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
+#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_PDS (1ULL << 42)
#define VTD_ECAP_SMTS (1ULL << 43)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 54c2b6b77a..bb957b93e0 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -315,7 +315,7 @@ struct IntelIOMMUState {
OnOffAuto intr_eim; /* Toggle for EIM cabability */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
bool dma_drain; /* Whether DMA r/w draining enabled */
- bool pasid; /* Whether to support PASID */
+ uint8_t pasid; /* PASID supported in bits, 0 if not */
bool fs1gp; /* First Stage 1-GByte Page Support */
/* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d24ba989bf..e5b9689fae 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4203,7 +4203,7 @@ static const Property vtd_properties[] = {
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, fsts, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
- DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
+ DEFINE_PROP_UINT8("pasid", IntelIOMMUState, pasid, 0),
DEFINE_PROP_BOOL("svm", IntelIOMMUState, svm, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
@@ -5046,7 +5046,8 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
if (s->pasid) {
- s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
+ VTD_ECAP_SET_PSS(s, s->pasid - 1);
+ s->ecap |= VTD_ECAP_PASID;
}
}
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8
2026-03-06 3:43 ` [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8 Zhenzhong Duan
@ 2026-03-18 12:20 ` Yi Liu
2026-03-19 8:08 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:20 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:43, Zhenzhong Duan wrote:
> 'x-pasid-mode' is a bool property, we need an extra 'pss' property to
> represent PASID size supported. Because there is no any device in QEMU
> supporting pasid capability yet, no guest could use the pasid feature
> until now, 'x-pasid-mode' takes no effect.
>
> So instead of an extra 'pss' property we can use a single 'pasid'
> property of uint8 type to represent if pasid is supported and the PASID
> bits size. A value of N > 0 means pasid is supported and N - 1 is the
> value in PSS field in ECAP register.
>
should we keep the "x-" prefix since this is new pasid support?
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 +-
> include/hw/i386/intel_iommu.h | 2 +-
> hw/i386/intel_iommu.c | 5 +++--
> 3 files changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 11a53aa369..db4f186a3e 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -195,7 +195,7 @@
> #define VTD_ECAP_MHMV (15ULL << 20)
> #define VTD_ECAP_SRS (1ULL << 31)
> #define VTD_ECAP_NWFS (1ULL << 33)
> -#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
> +#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_PDS (1ULL << 42)
> #define VTD_ECAP_SMTS (1ULL << 43)
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 54c2b6b77a..bb957b93e0 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -315,7 +315,7 @@ struct IntelIOMMUState {
> OnOffAuto intr_eim; /* Toggle for EIM cabability */
> uint8_t aw_bits; /* Host/IOVA address width (in bits) */
> bool dma_drain; /* Whether DMA r/w draining enabled */
> - bool pasid; /* Whether to support PASID */
> + uint8_t pasid; /* PASID supported in bits, 0 if not */
> bool fs1gp; /* First Stage 1-GByte Page Support */
>
> /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index d24ba989bf..e5b9689fae 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -4203,7 +4203,7 @@ static const Property vtd_properties[] = {
> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
> DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, fsts, FALSE),
> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
> - DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
> + DEFINE_PROP_UINT8("pasid", IntelIOMMUState, pasid, 0),
> DEFINE_PROP_BOOL("svm", IntelIOMMUState, svm, false),
> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
> @@ -5046,7 +5046,8 @@ static void vtd_cap_init(IntelIOMMUState *s)
> }
>
> if (s->pasid) {
> - s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
> + VTD_ECAP_SET_PSS(s, s->pasid - 1);
> + s->ecap |= VTD_ECAP_PASID;
> }
> }
>
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8
2026-03-18 12:20 ` Yi Liu
@ 2026-03-19 8:08 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 8:08 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 05/13] intel_iommu: Change pasid property from bool to
>uint8
>
>On 3/6/26 11:43, Zhenzhong Duan wrote:
>> 'x-pasid-mode' is a bool property, we need an extra 'pss' property to
>> represent PASID size supported. Because there is no any device in QEMU
>> supporting pasid capability yet, no guest could use the pasid feature
>> until now, 'x-pasid-mode' takes no effect.
>>
>> So instead of an extra 'pss' property we can use a single 'pasid'
>> property of uint8 type to represent if pasid is supported and the PASID
>> bits size. A value of N > 0 means pasid is supported and N - 1 is the
>> value in PSS field in ECAP register.
>>
>
>should we keep the "x-" prefix since this is new pasid support?
Copied Daniel's comments about "x-" prefix:
The purpose of using an 'x-' prefix for properties in QEMU is to declare
that they are subject to change with no warning, so we are free to change
them without any deprecation
I think uint8 type is large enough as pasid bits size in foreseeable future.
So we will not need to change or extend pasid property, so 'pasid' property looks fine for me.
Thanks
Zhenzhong
>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_internal.h | 2 +-
>> include/hw/i386/intel_iommu.h | 2 +-
>> hw/i386/intel_iommu.c | 5 +++--
>> 3 files changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 11a53aa369..db4f186a3e 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -195,7 +195,7 @@
>> #define VTD_ECAP_MHMV (15ULL << 20)
>> #define VTD_ECAP_SRS (1ULL << 31)
>> #define VTD_ECAP_NWFS (1ULL << 33)
>> -#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
>> +#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
>> #define VTD_ECAP_PASID (1ULL << 40)
>> #define VTD_ECAP_PDS (1ULL << 42)
>> #define VTD_ECAP_SMTS (1ULL << 43)
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 54c2b6b77a..bb957b93e0 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -315,7 +315,7 @@ struct IntelIOMMUState {
>> OnOffAuto intr_eim; /* Toggle for EIM cabability */
>> uint8_t aw_bits; /* Host/IOVA address width (in bits) */
>> bool dma_drain; /* Whether DMA r/w draining enabled */
>> - bool pasid; /* Whether to support PASID */
>> + uint8_t pasid; /* PASID supported in bits, 0 if not */
>> bool fs1gp; /* First Stage 1-GByte Page Support */
>>
>> /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index d24ba989bf..e5b9689fae 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -4203,7 +4203,7 @@ static const Property vtd_properties[] = {
>> DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode,
>FALSE),
>> DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, fsts, FALSE),
>> DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control,
>false),
>> - DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
>> + DEFINE_PROP_UINT8("pasid", IntelIOMMUState, pasid, 0),
>> DEFINE_PROP_BOOL("svm", IntelIOMMUState, svm, false),
>> DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
>> DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
>> @@ -5046,7 +5046,8 @@ static void vtd_cap_init(IntelIOMMUState *s)
>> }
>>
>> if (s->pasid) {
>> - s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
>> + VTD_ECAP_SET_PSS(s, s->pasid - 1);
>> + s->ecap |= VTD_ECAP_PASID;
>> }
>> }
>>
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 06/13] intel_iommu: Export some functions
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (4 preceding siblings ...)
2026-03-06 3:43 ` [PATCH v1 05/13] intel_iommu: Change pasid property from bool to uint8 Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-18 12:21 ` Yi Liu
2026-03-06 3:44 ` [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
` (6 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
Export some functions for accel code usages. Inline functions and MACROs
are moved to internal header files. Then accel code in following patches
could access them.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@eviden.com>
---
hw/i386/intel_iommu_internal.h | 31 +++++++++++++++++++++++++
hw/i386/intel_iommu.c | 42 ++++++++--------------------------
2 files changed, 40 insertions(+), 33 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index db4f186a3e..c7e107fe87 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -620,6 +620,12 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
+/* context entry operations */
+#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
+ ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+ ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
+
typedef struct VTDPASIDCacheInfo {
uint8_t type;
uint16_t did;
@@ -746,4 +752,29 @@ static inline bool vtd_pe_pgtt_is_fst(VTDPASIDEntry *pe)
{
return (VTD_SM_PASID_ENTRY_PGTT(pe) == VTD_SM_PASID_ENTRY_FST);
}
+
+static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
+{
+ return pdire->val & 1;
+}
+
+static inline bool vtd_pe_present(VTDPASIDEntry *pe)
+{
+ return pe->val[0] & VTD_PASID_ENTRY_P;
+}
+
+static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
+{
+ return memcmp(p1, p2, sizeof(*p1));
+}
+
+int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
+ VTDPASIDDirEntry *pdire);
+int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
+ dma_addr_t addr, VTDPASIDEntry *pe);
+int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+ uint8_t devfn, VTDContextEntry *ce);
+int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
+ VTDPASIDEntry *pe, uint32_t pasid);
+VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid);
#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e5b9689fae..744b5967b2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -42,12 +42,6 @@
#include "migration/vmstate.h"
#include "trace.h"
-/* context entry operations */
-#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
- ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
-#define VTD_CE_GET_PRE(ce) \
- ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
-
/*
* Paging mode for first-stage translation (VTD spec Figure 9-6)
* 00: 4-level paging, 01: 5-level paging
@@ -831,18 +825,12 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
}
}
-static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
-{
- return pdire->val & 1;
-}
-
/**
* Caller of this function should check present bit if wants
* to use pdir entry for further usage except for fpd bit check.
*/
-static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire)
+int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
+ VTDPASIDDirEntry *pdire)
{
uint32_t index;
dma_addr_t addr, entry_size;
@@ -860,15 +848,8 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
return 0;
}
-static inline bool vtd_pe_present(VTDPASIDEntry *pe)
-{
- return pe->val[0] & VTD_PASID_ENTRY_P;
-}
-
-static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
- uint32_t pasid,
- dma_addr_t addr,
- VTDPASIDEntry *pe)
+int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
+ dma_addr_t addr, VTDPASIDEntry *pe)
{
uint8_t pgtt;
uint32_t index;
@@ -954,8 +935,8 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
return 0;
}
-static int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
- VTDPASIDEntry *pe, uint32_t pasid)
+int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
+ VTDPASIDEntry *pe, uint32_t pasid)
{
dma_addr_t pasid_dir_base;
@@ -1526,8 +1507,8 @@ static int vtd_ce_pasid_0_check(IntelIOMMUState *s, VTDContextEntry *ce)
}
/* Map a device to its corresponding domain (context-entry) */
-static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
- uint8_t devfn, VTDContextEntry *ce)
+int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+ uint8_t devfn, VTDContextEntry *ce)
{
VTDRootEntry re;
int ret_fr;
@@ -1909,7 +1890,7 @@ static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
vtd_find_as_by_sid_and_pasid, &key);
}
-static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
{
return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
}
@@ -3133,11 +3114,6 @@ static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
}
-static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
-{
- return memcmp(p1, p2, sizeof(*p1));
-}
-
/* Update or invalidate pasid cache based on the pasid entry in guest memory. */
static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
gpointer user_data)
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 06/13] intel_iommu: Export some functions
2026-03-06 3:44 ` [PATCH v1 06/13] intel_iommu: Export some functions Zhenzhong Duan
@ 2026-03-18 12:21 ` Yi Liu
0 siblings, 0 replies; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:21 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:44, Zhenzhong Duan wrote:
> Export some functions for accel code usages. Inline functions and MACROs
> are moved to internal header files. Then accel code in following patches
> could access them.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Clement Mathieu--Drif <clement.mathieu--drif@eviden.com>
> ---
> hw/i386/intel_iommu_internal.h | 31 +++++++++++++++++++++++++
> hw/i386/intel_iommu.c | 42 ++++++++--------------------------
> 2 files changed, 40 insertions(+), 33 deletions(-)
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index db4f186a3e..c7e107fe87 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -620,6 +620,12 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
>
> +/* context entry operations */
> +#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
> + ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
> +#define VTD_CE_GET_PRE(ce) \
> + ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
> +
> typedef struct VTDPASIDCacheInfo {
> uint8_t type;
> uint16_t did;
> @@ -746,4 +752,29 @@ static inline bool vtd_pe_pgtt_is_fst(VTDPASIDEntry *pe)
> {
> return (VTD_SM_PASID_ENTRY_PGTT(pe) == VTD_SM_PASID_ENTRY_FST);
> }
> +
> +static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> +{
> + return pdire->val & 1;
> +}
> +
> +static inline bool vtd_pe_present(VTDPASIDEntry *pe)
> +{
> + return pe->val[0] & VTD_PASID_ENTRY_P;
> +}
> +
> +static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> +{
> + return memcmp(p1, p2, sizeof(*p1));
> +}
> +
> +int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
> + VTDPASIDDirEntry *pdire);
> +int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> + dma_addr_t addr, VTDPASIDEntry *pe);
> +int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> + uint8_t devfn, VTDContextEntry *ce);
> +int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
> + VTDPASIDEntry *pe, uint32_t pasid);
> +VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid);
> #endif
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e5b9689fae..744b5967b2 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -42,12 +42,6 @@
> #include "migration/vmstate.h"
> #include "trace.h"
>
> -/* context entry operations */
> -#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
> - ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
> -#define VTD_CE_GET_PRE(ce) \
> - ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
> -
> /*
> * Paging mode for first-stage translation (VTD spec Figure 9-6)
> * 00: 4-level paging, 01: 5-level paging
> @@ -831,18 +825,12 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> }
> }
>
> -static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> -{
> - return pdire->val & 1;
> -}
> -
> /**
> * Caller of this function should check present bit if wants
> * to use pdir entry for further usage except for fpd bit check.
> */
> -static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
> - uint32_t pasid,
> - VTDPASIDDirEntry *pdire)
> +int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
> + VTDPASIDDirEntry *pdire)
> {
> uint32_t index;
> dma_addr_t addr, entry_size;
> @@ -860,15 +848,8 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
> return 0;
> }
>
> -static inline bool vtd_pe_present(VTDPASIDEntry *pe)
> -{
> - return pe->val[0] & VTD_PASID_ENTRY_P;
> -}
> -
> -static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> - uint32_t pasid,
> - dma_addr_t addr,
> - VTDPASIDEntry *pe)
> +int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> + dma_addr_t addr, VTDPASIDEntry *pe)
> {
> uint8_t pgtt;
> uint32_t index;
> @@ -954,8 +935,8 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> return 0;
> }
>
> -static int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
> - VTDPASIDEntry *pe, uint32_t pasid)
> +int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
> + VTDPASIDEntry *pe, uint32_t pasid)
> {
> dma_addr_t pasid_dir_base;
>
> @@ -1526,8 +1507,8 @@ static int vtd_ce_pasid_0_check(IntelIOMMUState *s, VTDContextEntry *ce)
> }
>
> /* Map a device to its corresponding domain (context-entry) */
> -static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> - uint8_t devfn, VTDContextEntry *ce)
> +int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> + uint8_t devfn, VTDContextEntry *ce)
> {
> VTDRootEntry re;
> int ret_fr;
> @@ -1909,7 +1890,7 @@ static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
> vtd_find_as_by_sid_and_pasid, &key);
> }
>
> -static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> +VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> {
> return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
> }
> @@ -3133,11 +3114,6 @@ static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
> return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
> }
>
> -static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> -{
> - return memcmp(p1, p2, sizeof(*p1));
> -}
> -
> /* Update or invalidate pasid cache based on the pasid entry in guest memory. */
> static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> gpointer user_data)
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (5 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 06/13] intel_iommu: Export some functions Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-18 12:42 ` Yi Liu
2026-03-20 10:08 ` Yi Liu
2026-03-06 3:44 ` [PATCH v1 08/13] intel_iommu: Handle PASID entry removal " Zhenzhong Duan
` (5 subsequent siblings)
12 siblings, 2 replies; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
Structure VTDAddressSpace includes some elements suitable for emulated
device and passthrough device without PASID, e.g., address space,
different memory regions, etc, it is also protected by vtd iommu lock,
all these are useless and become a burden for passthrough device with
PASID.
When there are lots of PASIDs used in one device, the AS and MRs are
all registered to memory core and impact the whole system performance.
So instead of using VTDAddressSpace to cache pasid entry for each pasid
of a passthrough device, we define a light weight structure
VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
will use this struct as a parameter to conduct binding/unbinding to
nested hwpt and to record the current binded nested hwpt. It's also
designed to support PASID_0.
When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
(pasid cache invalidation) request, walk through each pasid in each
passthrough device for valid pasid entries, create a new
VTDACCELPASIDCacheEntry if not existing yet.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.h | 13 +++
hw/i386/intel_iommu_internal.h | 8 ++
hw/i386/intel_iommu.c | 3 +
hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
4 files changed, 194 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index e5f0b077b4..a77fd06fe0 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -12,6 +12,13 @@
#define HW_I386_INTEL_IOMMU_ACCEL_H
#include CONFIG_DEVICES
+typedef struct VTDACCELPASIDCacheEntry {
+ VTDHostIOMMUDevice *vtd_hiod;
+ VTDPASIDEntry pe;
+ uint32_t pasid;
+ QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
+} VTDACCELPASIDCacheEntry;
+
#ifdef CONFIG_VTD_ACCEL
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp);
@@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
+void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
#else
static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
@@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
{
}
+static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
+ VTDPASIDCacheInfo *pc_info)
+{
+}
+
static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
{
}
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index c7e107fe87..ede4db6d2d 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CTX_ENTRY_SCALABLE_SIZE 32
#define PASID_0 0
+#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
@@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
#define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
#define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
#define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
#define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
#define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
@@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
PCIBus *bus;
uint8_t devfn;
HostIOMMUDevice *hiod;
+ QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
} VTDHostIOMMUDevice;
/*
@@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
return memcmp(p1, p2, sizeof(*p1));
}
+static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
+{
+ return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
+}
+
int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
VTDPASIDDirEntry *pdire);
int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 744b5967b2..984adc639a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
pc_info);
vtd_iommu_unlock(s);
+
+ vtd_pasid_cache_sync_accel(s, pc_info);
}
static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
@@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
vtd_hiod->devfn = (uint8_t)devfn;
vtd_hiod->iommu_state = s;
vtd_hiod->hiod = hiod;
+ QLIST_INIT(&vtd_hiod->pasid_cache_list);
if (!vtd_check_hiod(s, vtd_hiod, errp)) {
g_free(vtd_hiod);
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index c2757f3bcd..0acf3ae77f 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
vtd_flush_host_piotlb_locked, &piotlb_info);
}
+static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
+ VTDPASIDEntry *pe)
+{
+ VTDACCELPASIDCacheEntry *vtd_pce;
+
+ QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
+ if (vtd_pce->pasid == pasid) {
+ if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
+ vtd_pce->pe = *pe;
+ }
+ return;
+ }
+ }
+
+ vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
+ vtd_pce->vtd_hiod = vtd_hiod;
+ vtd_pce->pasid = pasid;
+ vtd_pce->pe = *pe;
+ QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
+}
+
+/*
+ * This function walks over PASID range within [start, end) in a single
+ * PASID table for entries matching @info type/did, then create
+ * VTDACCELPASIDCacheEntry if not exist yet.
+ */
+static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
+ dma_addr_t pt_base,
+ int start,
+ int end,
+ VTDPASIDCacheInfo *info)
+{
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ VTDPASIDEntry pe;
+ int pasid;
+
+ for (pasid = start; pasid < end; pasid++) {
+ if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
+ !vtd_pe_present(&pe)) {
+ continue;
+ }
+
+ if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
+ info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
+ (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
+ /*
+ * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
+ * requires domain id check. If domain id check fail,
+ * go to next pasid.
+ */
+ continue;
+ }
+
+ vtd_find_add_pc(vtd_hiod, pasid, &pe);
+ }
+}
+
+/*
+ * In VT-d scalable mode translation, PASID dir + PASID table is used.
+ * This function aims at looping over a range of PASIDs in the given
+ * two level table to identify the pasid config in guest.
+ */
+static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
+ dma_addr_t pdt_base,
+ int start, int end,
+ VTDPASIDCacheInfo *info)
+{
+ VTDPASIDDirEntry pdire;
+ int pasid = start;
+ int pasid_next;
+ dma_addr_t pt_base;
+
+ while (pasid < end) {
+ pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
+ ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
+ pasid_next = pasid_next < end ? pasid_next : end;
+
+ if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+ && vtd_pdire_present(&pdire)) {
+ pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+ vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
+ info);
+ }
+ pasid = pasid_next;
+ }
+}
+
+static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
+ int start, int end,
+ VTDPASIDCacheInfo *pc_info)
+{
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ VTDContextEntry ce;
+ int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
+
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
+ vtd_hiod->devfn, &ce)) {
+ VTDPASIDCacheInfo walk_info = *pc_info;
+ uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
+ VTD_PASID_TABLE_ENTRY_NUM;
+
+ end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
+
+ vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
+ start, end, &walk_info);
+ }
+}
+
+/*
+ * This function replays the guest pasid bindings by walking the two level
+ * guest PASID table. For each valid pasid entry, it creates an entry
+ * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
+ * info specific to a pasid
+ */
+void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+ int start = PASID_0, end = 1 << s->pasid;
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter as_it;
+
+ if (!s->fsts) {
+ return;
+ }
+
+ /*
+ * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
+ * iommu pasid
+ */
+ if (pc_info->pasid == PCI_NO_PASID) {
+ pc_info->pasid = PASID_0;
+ }
+
+ switch (pc_info->type) {
+ case VTD_INV_DESC_PASIDC_G_PASID_SI:
+ start = pc_info->pasid;
+ end = pc_info->pasid + 1;
+ /* fall through */
+ case VTD_INV_DESC_PASIDC_G_DSI:
+ /*
+ * loop all assigned devices, do domain id check in
+ * vtd_sm_pasid_table_walk_one() after get pasid entry.
+ */
+ break;
+ case VTD_INV_DESC_PASIDC_G_GLOBAL:
+ /* loop all assigned devices */
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * In this replay, one only needs to care about the devices which are
+ * backed by host IOMMU. Those devices have a corresponding vtd_hiod
+ * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
+ * is not necessary to replay the bindings since their cache should be
+ * created in the future DMA address translation.
+ *
+ * VTD translation callback never accesses vtd_hiod and its corresponding
+ * cached pasid entry, so no iommu lock needed here.
+ */
+ g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
+ if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
+ TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+ continue;
+ }
+ vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
+ }
+}
+
static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
void *caps, uint32_t size)
{
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-06 3:44 ` [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
@ 2026-03-18 12:42 ` Yi Liu
2026-03-19 8:26 ` Duan, Zhenzhong
2026-03-20 10:08 ` Yi Liu
1 sibling, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-18 12:42 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:44, Zhenzhong Duan wrote:
> Structure VTDAddressSpace includes some elements suitable for emulated
> device and passthrough device without PASID, e.g., address space,
> different memory regions, etc, it is also protected by vtd iommu lock,
> all these are useless and become a burden for passthrough device with
> PASID.
>
> When there are lots of PASIDs used in one device, the AS and MRs are
> all registered to memory core and impact the whole system performance.
>
> So instead of using VTDAddressSpace to cache pasid entry for each pasid
> of a passthrough device, we define a light weight structure
> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
> will use this struct as a parameter to conduct binding/unbinding to
> nested hwpt and to record the current binded nested hwpt. It's also
> designed to support PASID_0.
PASID_0 of passthrough device still need to register MRs in case guest
does not operate in scalable mode. So for PASID_0, you will have both
VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
> (pasid cache invalidation) request, walk through each pasid in each
> passthrough device for valid pasid entries, create a new
> VTDACCELPASIDCacheEntry if not existing yet.
I think this idea is ok, but we need to be clear about the
boundary between the usages of the two pasid cache structures.
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_accel.h | 13 +++
> hw/i386/intel_iommu_internal.h | 8 ++
> hw/i386/intel_iommu.c | 3 +
> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
> 4 files changed, 194 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index e5f0b077b4..a77fd06fe0 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -12,6 +12,13 @@
> #define HW_I386_INTEL_IOMMU_ACCEL_H
> #include CONFIG_DEVICES
>
> +typedef struct VTDACCELPASIDCacheEntry {
> + VTDHostIOMMUDevice *vtd_hiod;
> + VTDPASIDEntry pe;
let's use pasid_entry here as we already used it in the
VTDPASIDCacheEntry. Sometimees, it may help when for searching related
code.
> + uint32_t pasid;
> + QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
> +} VTDACCELPASIDCacheEntry;
> +
> #ifdef CONFIG_VTD_ACCEL
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp);
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> uint32_t pasid, hwaddr addr,
> uint64_t npages, bool ih);
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
> #else
> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> @@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
> {
> }
>
> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info)
> +{
> +}
> +
> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
> {
> }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index c7e107fe87..ede4db6d2d 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> #define PASID_0 0
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
> PCIBus *bus;
> uint8_t devfn;
> HostIOMMUDevice *hiod;
> + QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
> } VTDHostIOMMUDevice;
>
> /*
> @@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> return memcmp(p1, p2, sizeof(*p1));
> }
>
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
> VTDPASIDDirEntry *pdire);
> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 744b5967b2..984adc639a 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
> pc_info);
> vtd_iommu_unlock(s);
> +
> + vtd_pasid_cache_sync_accel(s, pc_info);
> }
>
> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> vtd_hiod->devfn = (uint8_t)devfn;
> vtd_hiod->iommu_state = s;
> vtd_hiod->hiod = hiod;
> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>
> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
> g_free(vtd_hiod);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index c2757f3bcd..0acf3ae77f 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> vtd_flush_host_piotlb_locked, &piotlb_info);
> }
>
> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> + VTDPASIDEntry *pe)
> +{
> + VTDACCELPASIDCacheEntry *vtd_pce;
> +
> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
> + if (vtd_pce->pasid == pasid) {
> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
> + vtd_pce->pe = *pe;
> + }
> + return;
> + }
> + }
> +
> + vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
> + vtd_pce->vtd_hiod = vtd_hiod;
> + vtd_pce->pasid = pasid;
> + vtd_pce->pe = *pe;
> + QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> +}
> +
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then create
> + * VTDACCELPASIDCacheEntry if not exist yet.
> + */
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pt_base,
> + int start,
> + int end,
> + VTDPASIDCacheInfo *info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDPASIDEntry pe;
> + int pasid;
> +
> + for (pasid = start; pasid < end; pasid++) {
> + if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
> + !vtd_pe_present(&pe)) {
> + continue;
> + }
> +
> + if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
> + info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> + /*
> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> + * requires domain id check. If domain id check fail,
> + * go to next pasid.
> + */
> + continue;
> + }
> +
> + vtd_find_add_pc(vtd_hiod, pasid, &pe);
> + }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pdt_base,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDDirEntry pdire;
> + int pasid = start;
> + int pasid_next;
> + dma_addr_t pt_base;
> +
> + while (pasid < end) {
> + pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
> + ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
> + pasid_next = pasid_next < end ? pasid_next : end;
> +
> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> + && vtd_pdire_present(&pdire)) {
> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> + vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
> + info);
> + }
> + pasid = pasid_next;
> + }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
> + int start, int end,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDContextEntry ce;
> + int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
> +
> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> + vtd_hiod->devfn, &ce)) {
> + VTDPASIDCacheInfo walk_info = *pc_info;
> + uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
> + VTD_PASID_TABLE_ENTRY_NUM;
> +
> + end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
> +
> + vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
> + start, end, &walk_info);
> + }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it creates an entry
> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
> + * info specific to a pasid
> + */
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> + int start = PASID_0, end = 1 << s->pasid;
> + VTDHostIOMMUDevice *vtd_hiod;
> + GHashTableIter as_it;
> +
> + if (!s->fsts) {
> + return;
> + }
> +
> + /*
> + * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
> + * iommu pasid
> + */
> + if (pc_info->pasid == PCI_NO_PASID) {
> + pc_info->pasid = PASID_0;
> + }
> +
> + switch (pc_info->type) {
> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
> + start = pc_info->pasid;
> + end = pc_info->pasid + 1;
> + /* fall through */
> + case VTD_INV_DESC_PASIDC_G_DSI:
> + /*
> + * loop all assigned devices, do domain id check in
> + * vtd_sm_pasid_table_walk_one() after get pasid entry.
> + */
> + break;
> + case VTD_INV_DESC_PASIDC_G_GLOBAL:
> + /* loop all assigned devices */
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + /*
> + * In this replay, one only needs to care about the devices which are
> + * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> + * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> + * is not necessary to replay the bindings since their cache should be
> + * created in the future DMA address translation.
> + *
> + * VTD translation callback never accesses vtd_hiod and its corresponding
> + * cached pasid entry, so no iommu lock needed here.
> + */
> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> + if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> + TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> + continue;
> + }
> + vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> + }
> +}
> +
> static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
> void *caps, uint32_t size)
> {
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-18 12:42 ` Yi Liu
@ 2026-03-19 8:26 ` Duan, Zhenzhong
2026-03-20 10:13 ` Yi Liu
0 siblings, 1 reply; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 8:26 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/6/26 11:44, Zhenzhong Duan wrote:
>> Structure VTDAddressSpace includes some elements suitable for emulated
>> device and passthrough device without PASID, e.g., address space,
>> different memory regions, etc, it is also protected by vtd iommu lock,
>> all these are useless and become a burden for passthrough device with
>> PASID.
>>
>> When there are lots of PASIDs used in one device, the AS and MRs are
>> all registered to memory core and impact the whole system performance.
>>
>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>> of a passthrough device, we define a light weight structure
>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>> will use this struct as a parameter to conduct binding/unbinding to
>> nested hwpt and to record the current binded nested hwpt. It's also
>> designed to support PASID_0.
>
>PASID_0 of passthrough device still need to register MRs in case guest
>does not operate in scalable mode. So for PASID_0, you will have both
>VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
Exactly!
>
>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>> (pasid cache invalidation) request, walk through each pasid in each
>> passthrough device for valid pasid entries, create a new
>> VTDACCELPASIDCacheEntry if not existing yet.
>
>I think this idea is ok, but we need to be clear about the
>boundary between the usages of the two pasid cache structures.
Yes, the principle here is VTDACCELPASIDCacheEntry is only used in intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c
>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_accel.h | 13 +++
>> hw/i386/intel_iommu_internal.h | 8 ++
>> hw/i386/intel_iommu.c | 3 +
>> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
>> 4 files changed, 194 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index e5f0b077b4..a77fd06fe0 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -12,6 +12,13 @@
>> #define HW_I386_INTEL_IOMMU_ACCEL_H
>> #include CONFIG_DEVICES
>>
>> +typedef struct VTDACCELPASIDCacheEntry {
>> + VTDHostIOMMUDevice *vtd_hiod;
>> + VTDPASIDEntry pe;
>
>let's use pasid_entry here as we already used it in the
>VTDPASIDCacheEntry. Sometimees, it may help when for searching related
>code.
Good suggestion, will do.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread* Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-19 8:26 ` Duan, Zhenzhong
@ 2026-03-20 10:13 ` Yi Liu
2026-03-23 5:59 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-20 10:13 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
On 3/19/26 16:26, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>> pc_inv_dsc request
>>
>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>> device and passthrough device without PASID, e.g., address space,
>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>> all these are useless and become a burden for passthrough device with
>>> PASID.
>>>
>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>> all registered to memory core and impact the whole system performance.
>>>
>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>> of a passthrough device, we define a light weight structure
>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>> will use this struct as a parameter to conduct binding/unbinding to
>>> nested hwpt and to record the current binded nested hwpt. It's also
>>> designed to support PASID_0.
>>
>> PASID_0 of passthrough device still need to register MRs in case guest
>> does not operate in scalable mode. So for PASID_0, you will have both
>> VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
>
> Exactly!
better to note it somewhere (commit message, code comment). It would be
a good hint for future maintenance. e.g. it is easy to forget this fact. :)
>>
>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>> (pasid cache invalidation) request, walk through each pasid in each
>>> passthrough device for valid pasid entries, create a new
>>> VTDACCELPASIDCacheEntry if not existing yet.
>>
>> I think this idea is ok, but we need to be clear about the
>> boundary between the usages of the two pasid cache structures.
>
> Yes, the principle here is VTDACCELPASIDCacheEntry is only used in intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c
just mark it in commit message as well.
>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> hw/i386/intel_iommu_accel.h | 13 +++
>>> hw/i386/intel_iommu_internal.h | 8 ++
>>> hw/i386/intel_iommu.c | 3 +
>>> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
>>> 4 files changed, 194 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>> index e5f0b077b4..a77fd06fe0 100644
>>> --- a/hw/i386/intel_iommu_accel.h
>>> +++ b/hw/i386/intel_iommu_accel.h
>>> @@ -12,6 +12,13 @@
>>> #define HW_I386_INTEL_IOMMU_ACCEL_H
>>> #include CONFIG_DEVICES
>>>
>>> +typedef struct VTDACCELPASIDCacheEntry {
>>> + VTDHostIOMMUDevice *vtd_hiod;
>>> + VTDPASIDEntry pe;
>>
>> let's use pasid_entry here as we already used it in the
>> VTDPASIDCacheEntry. Sometimees, it may help when for searching related
>> code.
>
> Good suggestion, will do.
>
> Thanks
> Zhenzhong
>
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-20 10:13 ` Yi Liu
@ 2026-03-23 5:59 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 5:59 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>
>
>On 3/19/26 16:26, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>>> pc_inv_dsc request
>>>
>>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>>> device and passthrough device without PASID, e.g., address space,
>>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>>> all these are useless and become a burden for passthrough device with
>>>> PASID.
>>>>
>>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>>> all registered to memory core and impact the whole system performance.
>>>>
>>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>>> of a passthrough device, we define a light weight structure
>>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>>> will use this struct as a parameter to conduct binding/unbinding to
>>>> nested hwpt and to record the current binded nested hwpt. It's also
>>>> designed to support PASID_0.
>>>
>>> PASID_0 of passthrough device still need to register MRs in case guest
>>> does not operate in scalable mode. So for PASID_0, you will have both
>>> VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
>>
>> Exactly!
>
>better to note it somewhere (commit message, code comment). It would be
>a good hint for future maintenance. e.g. it is easy to forget this fact. :)
Will do.
>
>>>
>>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>>> (pasid cache invalidation) request, walk through each pasid in each
>>>> passthrough device for valid pasid entries, create a new
>>>> VTDACCELPASIDCacheEntry if not existing yet.
>>>
>>> I think this idea is ok, but we need to be clear about the
>>> boundary between the usages of the two pasid cache structures.
>>
>> Yes, the principle here is VTDACCELPASIDCacheEntry is only used in
>intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c
>
>just mark it in commit message as well.
Sure.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-06 3:44 ` [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
2026-03-18 12:42 ` Yi Liu
@ 2026-03-20 10:08 ` Yi Liu
2026-03-23 5:50 ` Duan, Zhenzhong
1 sibling, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-20 10:08 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:44, Zhenzhong Duan wrote:
> Structure VTDAddressSpace includes some elements suitable for emulated
> device and passthrough device without PASID, e.g., address space,
> different memory regions, etc, it is also protected by vtd iommu lock,
> all these are useless and become a burden for passthrough device with
> PASID.
>
> When there are lots of PASIDs used in one device, the AS and MRs are
> all registered to memory core and impact the whole system performance.
>
> So instead of using VTDAddressSpace to cache pasid entry for each pasid
> of a passthrough device, we define a light weight structure
> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
> will use this struct as a parameter to conduct binding/unbinding to
> nested hwpt and to record the current binded nested hwpt. It's also
s/binded/bound/
> designed to support PASID_0.
>
> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
> (pasid cache invalidation) request, walk through each pasid in each
> passthrough device for valid pasid entries, create a new
> VTDACCELPASIDCacheEntry if not existing yet.
I think some tweak is preferred w.r.t. this and the next patch.
In this patch you only need to handle the PASID entry addition. Hence
you assume no existing VTDACCELPASIDCacheEntry yet.
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_accel.h | 13 +++
> hw/i386/intel_iommu_internal.h | 8 ++
> hw/i386/intel_iommu.c | 3 +
> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
> 4 files changed, 194 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index e5f0b077b4..a77fd06fe0 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -12,6 +12,13 @@
> #define HW_I386_INTEL_IOMMU_ACCEL_H
> #include CONFIG_DEVICES
>
> +typedef struct VTDACCELPASIDCacheEntry {
> + VTDHostIOMMUDevice *vtd_hiod;
> + VTDPASIDEntry pe;
> + uint32_t pasid;
> + QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
> +} VTDACCELPASIDCacheEntry;
btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
> +
> #ifdef CONFIG_VTD_ACCEL
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp);
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> uint32_t pasid, hwaddr addr,
> uint64_t npages, bool ih);
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
> #else
> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> @@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
> {
> }
>
> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info)
> +{
> +}
> +
> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
> {
> }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index c7e107fe87..ede4db6d2d 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> #define PASID_0 0
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
> PCIBus *bus;
> uint8_t devfn;
> HostIOMMUDevice *hiod;
> + QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
> } VTDHostIOMMUDevice;
>
> /*
> @@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> return memcmp(p1, p2, sizeof(*p1));
> }
>
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
> VTDPASIDDirEntry *pdire);
> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 744b5967b2..984adc639a 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
> pc_info);
> vtd_iommu_unlock(s);
> +
> + vtd_pasid_cache_sync_accel(s, pc_info);
> }
>
> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> vtd_hiod->devfn = (uint8_t)devfn;
> vtd_hiod->iommu_state = s;
> vtd_hiod->hiod = hiod;
> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>
> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
> g_free(vtd_hiod);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index c2757f3bcd..0acf3ae77f 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> vtd_flush_host_piotlb_locked, &piotlb_info);
> }
>
> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> + VTDPASIDEntry *pe)
> +{
then you can name this as vtd_accel_fill_pc(). And I think you would
have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
> + VTDACCELPASIDCacheEntry *vtd_pce;
> +
> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
> + if (vtd_pce->pasid == pasid) {
> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
> + vtd_pce->pe = *pe;
> + }
> + return;
> + }
> + }
hence this loop can be avoided.
> + vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
> + vtd_pce->vtd_hiod = vtd_hiod;
> + vtd_pce->pasid = pasid;
> + vtd_pce->pe = *pe;
> + QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> +}
> +
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then create
> + * VTDACCELPASIDCacheEntry if not exist yet.
> + */
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pt_base,
> + int start,
> + int end,
> + VTDPASIDCacheInfo *info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDPASIDEntry pe;
> + int pasid;
> +
> + for (pasid = start; pasid < end; pasid++) {
> + if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
> + !vtd_pe_present(&pe)) {
> + continue;
> + }
> +
> + if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
> + info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> + /*
> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> + * requires domain id check. If domain id check fail,
> + * go to next pasid.
> + */
> + continue;
> + }
> +
> + vtd_find_add_pc(vtd_hiod, pasid, &pe);
> + }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
> + dma_addr_t pdt_base,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDDirEntry pdire;
> + int pasid = start;
> + int pasid_next;
> + dma_addr_t pt_base;
> +
> + while (pasid < end) {
> + pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
> + ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
> + pasid_next = pasid_next < end ? pasid_next : end;
> +
> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> + && vtd_pdire_present(&pdire)) {
> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> + vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
> + info);
> + }
> + pasid = pasid_next;
> + }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
> + int start, int end,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + VTDContextEntry ce;
> + int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
> +
> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> + vtd_hiod->devfn, &ce)) {
> + VTDPASIDCacheInfo walk_info = *pc_info;
> + uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
> + VTD_PASID_TABLE_ENTRY_NUM;
> +
> + end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
> +
> + vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
> + start, end, &walk_info);
> + }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it creates an entry
> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
> + * info specific to a pasid
> + */
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> + int start = PASID_0, end = 1 << s->pasid;
> + VTDHostIOMMUDevice *vtd_hiod;
> + GHashTableIter as_it;
s/as_it/hiod_it/
> +
> + if (!s->fsts) {
> + return;
> + }
> +
> + /*
> + * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
> + * iommu pasid
> + */
> + if (pc_info->pasid == PCI_NO_PASID) {
> + pc_info->pasid = PASID_0;
> + }
> +
> + switch (pc_info->type) {
> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
> + start = pc_info->pasid;
> + end = pc_info->pasid + 1;
> + /* fall through */
> + case VTD_INV_DESC_PASIDC_G_DSI:
> + /*
> + * loop all assigned devices, do domain id check in
> + * vtd_sm_pasid_table_walk_one() after get pasid entry.
> + */
> + break;
> + case VTD_INV_DESC_PASIDC_G_GLOBAL:
> + /* loop all assigned devices */
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + /*
> + * In this replay, one only needs to care about the devices which are
> + * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> + * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> + * is not necessary to replay the bindings since their cache should be
> + * created in the future DMA address translation.
> + *
> + * VTD translation callback never accesses vtd_hiod and its corresponding
> + * cached pasid entry, so no iommu lock needed here.
> + */
> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> + if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> + TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> + continue;
> + }
> + vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> + }
> +}
> +
> static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
> void *caps, uint32_t size)
> {
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-20 10:08 ` Yi Liu
@ 2026-03-23 5:50 ` Duan, Zhenzhong
2026-03-23 7:38 ` Yi Liu
0 siblings, 1 reply; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 5:50 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/6/26 11:44, Zhenzhong Duan wrote:
>> Structure VTDAddressSpace includes some elements suitable for emulated
>> device and passthrough device without PASID, e.g., address space,
>> different memory regions, etc, it is also protected by vtd iommu lock,
>> all these are useless and become a burden for passthrough device with
>> PASID.
>>
>> When there are lots of PASIDs used in one device, the AS and MRs are
>> all registered to memory core and impact the whole system performance.
>>
>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>> of a passthrough device, we define a light weight structure
>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>> will use this struct as a parameter to conduct binding/unbinding to
>> nested hwpt and to record the current binded nested hwpt. It's also
>
>s/binded/bound/
OK.
>
>> designed to support PASID_0.
>>
>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>> (pasid cache invalidation) request, walk through each pasid in each
>> passthrough device for valid pasid entries, create a new
>> VTDACCELPASIDCacheEntry if not existing yet.
>
>I think some tweak is preferred w.r.t. this and the next patch.
>
>In this patch you only need to handle the PASID entry addition. Hence
>you assume no existing VTDACCELPASIDCacheEntry yet.
[...]
>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_accel.h | 13 +++
>> hw/i386/intel_iommu_internal.h | 8 ++
>> hw/i386/intel_iommu.c | 3 +
>> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
>> 4 files changed, 194 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index e5f0b077b4..a77fd06fe0 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -12,6 +12,13 @@
>> #define HW_I386_INTEL_IOMMU_ACCEL_H
>> #include CONFIG_DEVICES
>>
>> +typedef struct VTDACCELPASIDCacheEntry {
>> + VTDHostIOMMUDevice *vtd_hiod;
>> + VTDPASIDEntry pe;
>> + uint32_t pasid;
>> + QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>> +} VTDACCELPASIDCacheEntry;
>
>btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
Sure.
>> +
>> #ifdef CONFIG_VTD_ACCEL
>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>> Error **errp);
>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>*vtd_as, Error **errp);
>> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>domain_id,
>> uint32_t pasid, hwaddr addr,
>> uint64_t npages, bool ih);
>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info);
>> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>> #else
>> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>> @@ -49,6 +57,11 @@ static inline void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>> {
>> }
>>
>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>> + VTDPASIDCacheInfo *pc_info)
>> +{
>> +}
>> +
>> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>> {
>> }
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index c7e107fe87..ede4db6d2d 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>>
>> #define PASID_0 0
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
>> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
>> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) &
>VTD_PASID_DIR_BITS_MASK)
>> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
>> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
>> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable
>*/
>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>> PCIBus *bus;
>> uint8_t devfn;
>> HostIOMMUDevice *hiod;
>> + QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>> } VTDHostIOMMUDevice;
>>
>> /*
>> @@ -768,6 +771,11 @@ static inline int
>vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>> return memcmp(p1, p2, sizeof(*p1));
>> }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>> +{
>> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>> +}
>> +
>> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>> VTDPASIDDirEntry *pdire);
>> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 744b5967b2..984adc639a 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState
>*s, VTDPASIDCacheInfo *pc_info)
>> g_hash_table_foreach(s->vtd_address_spaces,
>vtd_pasid_cache_sync_locked,
>> pc_info);
>> vtd_iommu_unlock(s);
>> +
>> + vtd_pasid_cache_sync_accel(s, pc_info);
>> }
>>
>> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus,
>void *opaque, int devfn,
>> vtd_hiod->devfn = (uint8_t)devfn;
>> vtd_hiod->iommu_state = s;
>> vtd_hiod->hiod = hiod;
>> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>
>> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>> g_free(vtd_hiod);
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index c2757f3bcd..0acf3ae77f 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -257,6 +257,176 @@ void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>> vtd_flush_host_piotlb_locked, &piotlb_info);
>> }
>>
>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>> + VTDPASIDEntry *pe)
>> +{
>
>then you can name this as vtd_accel_fill_pc(). And I think you would
>have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
Yes, in current implementation this patch handles pasid entry addition and update,
next patch handles removal.
What's the benefit if we move pasid entry update into next patch?
>
>> + VTDACCELPASIDCacheEntry *vtd_pce;
>> +
>> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>> + if (vtd_pce->pasid == pasid) {
>> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>> + vtd_pce->pe = *pe;
>> + }
>> + return;
>> + }
>> + }
>
>hence this loop can be avoided.
Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
to bypass existing pasid entry to avoid adding duplicate pasid entry.
Let me know if I misunderstand what you mean.
>
>> + vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
>> + vtd_pce->vtd_hiod = vtd_hiod;
>> + vtd_pce->pasid = pasid;
>> + vtd_pce->pe = *pe;
>> + QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
>> +}
>> +
>> +/*
>> + * This function walks over PASID range within [start, end) in a single
>> + * PASID table for entries matching @info type/did, then create
>> + * VTDACCELPASIDCacheEntry if not exist yet.
>> + */
>> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
>> + dma_addr_t pt_base,
>> + int start,
>> + int end,
>> + VTDPASIDCacheInfo *info)
>> +{
>> + IntelIOMMUState *s = vtd_hiod->iommu_state;
>> + VTDPASIDEntry pe;
>> + int pasid;
>> +
>> + for (pasid = start; pasid < end; pasid++) {
>> + if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
>> + !vtd_pe_present(&pe)) {
>> + continue;
>> + }
>> +
>> + if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
>> + info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
>> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>> + /*
>> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
>> + * requires domain id check. If domain id check fail,
>> + * go to next pasid.
>> + */
>> + continue;
>> + }
>> +
>> + vtd_find_add_pc(vtd_hiod, pasid, &pe);
>> + }
>> +}
>> +
>> +/*
>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>> + * This function aims at looping over a range of PASIDs in the given
>> + * two level table to identify the pasid config in guest.
>> + */
>> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
>> + dma_addr_t pdt_base,
>> + int start, int end,
>> + VTDPASIDCacheInfo *info)
>> +{
>> + VTDPASIDDirEntry pdire;
>> + int pasid = start;
>> + int pasid_next;
>> + dma_addr_t pt_base;
>> +
>> + while (pasid < end) {
>> + pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
>> + ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
>> + pasid_next = pasid_next < end ? pasid_next : end;
>> +
>> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>> + && vtd_pdire_present(&pdire)) {
>> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
>> + vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
>> + info);
>> + }
>> + pasid = pasid_next;
>> + }
>> +}
>> +
>> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
>> + int start, int end,
>> + VTDPASIDCacheInfo *pc_info)
>> +{
>> + IntelIOMMUState *s = vtd_hiod->iommu_state;
>> + VTDContextEntry ce;
>> + int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
>> +
>> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
>> + vtd_hiod->devfn, &ce)) {
>> + VTDPASIDCacheInfo walk_info = *pc_info;
>> + uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>> + VTD_PASID_TABLE_ENTRY_NUM;
>> +
>> + end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
>> +
>> + vtd_sm_pasid_table_walk(vtd_hiod,
>VTD_CE_GET_PASID_DIR_TABLE(&ce),
>> + start, end, &walk_info);
>> + }
>> +}
>> +
>> +/*
>> + * This function replays the guest pasid bindings by walking the two level
>> + * guest PASID table. For each valid pasid entry, it creates an entry
>> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
>> + * info specific to a pasid
>> + */
>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info)
>> +{
>> + int start = PASID_0, end = 1 << s->pasid;
>> + VTDHostIOMMUDevice *vtd_hiod;
>> + GHashTableIter as_it;
>
>s/as_it/hiod_it/
Sure.
Thanks
Zhenzhong
>
>> +
>> + if (!s->fsts) {
>> + return;
>> + }
>> +
>> + /*
>> + * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry
>honors
>> + * iommu pasid
>> + */
>> + if (pc_info->pasid == PCI_NO_PASID) {
>> + pc_info->pasid = PASID_0;
>> + }
>> +
>> + switch (pc_info->type) {
>> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> + start = pc_info->pasid;
>> + end = pc_info->pasid + 1;
>> + /* fall through */
>> + case VTD_INV_DESC_PASIDC_G_DSI:
>> + /*
>> + * loop all assigned devices, do domain id check in
>> + * vtd_sm_pasid_table_walk_one() after get pasid entry.
>> + */
>> + break;
>> + case VTD_INV_DESC_PASIDC_G_GLOBAL:
>> + /* loop all assigned devices */
>> + break;
>> + default:
>> + g_assert_not_reached();
>> + }
>> +
>> + /*
>> + * In this replay, one only needs to care about the devices which are
>> + * backed by host IOMMU. Those devices have a corresponding vtd_hiod
>> + * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
>> + * is not necessary to replay the bindings since their cache should be
>> + * created in the future DMA address translation.
>> + *
>> + * VTD translation callback never accesses vtd_hiod and its corresponding
>> + * cached pasid entry, so no iommu lock needed here.
>> + */
>> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
>> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
>> + if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
>> + TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>> + continue;
>> + }
>> + vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>> + }
>> +}
>> +
>> static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
>> void *caps, uint32_t size)
>> {
^ permalink raw reply [flat|nested] 45+ messages in thread* Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-23 5:50 ` Duan, Zhenzhong
@ 2026-03-23 7:38 ` Yi Liu
2026-03-23 8:11 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-23 7:38 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
On 3/23/26 13:50, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>> pc_inv_dsc request
>>
>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>> device and passthrough device without PASID, e.g., address space,
>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>> all these are useless and become a burden for passthrough device with
>>> PASID.
>>>
>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>> all registered to memory core and impact the whole system performance.
>>>
>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>> of a passthrough device, we define a light weight structure
>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>> will use this struct as a parameter to conduct binding/unbinding to
>>> nested hwpt and to record the current binded nested hwpt. It's also
>>
>> s/binded/bound/
>
> OK.
>
>>
>>> designed to support PASID_0.
>>>
>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>> (pasid cache invalidation) request, walk through each pasid in each
>>> passthrough device for valid pasid entries, create a new
>>> VTDACCELPASIDCacheEntry if not existing yet.
>>
>> I think some tweak is preferred w.r.t. this and the next patch.
>>
>> In this patch you only need to handle the PASID entry addition. Hence
>> you assume no existing VTDACCELPASIDCacheEntry yet.
>
> [...]
>
>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> hw/i386/intel_iommu_accel.h | 13 +++
>>> hw/i386/intel_iommu_internal.h | 8 ++
>>> hw/i386/intel_iommu.c | 3 +
>>> hw/i386/intel_iommu_accel.c | 170 +++++++++++++++++++++++++++++++++
>>> 4 files changed, 194 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>> index e5f0b077b4..a77fd06fe0 100644
>>> --- a/hw/i386/intel_iommu_accel.h
>>> +++ b/hw/i386/intel_iommu_accel.h
>>> @@ -12,6 +12,13 @@
>>> #define HW_I386_INTEL_IOMMU_ACCEL_H
>>> #include CONFIG_DEVICES
>>>
>>> +typedef struct VTDACCELPASIDCacheEntry {
>>> + VTDHostIOMMUDevice *vtd_hiod;
>>> + VTDPASIDEntry pe;
>>> + uint32_t pasid;
>>> + QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>>> +} VTDACCELPASIDCacheEntry;
>>
>> btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
>
> Sure.
>
>>> +
>>> #ifdef CONFIG_VTD_ACCEL
>>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>> *vtd_hiod,
>>> Error **errp);
>>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>> *vtd_as, Error **errp);
>>> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>> domain_id,
>>> uint32_t pasid, hwaddr addr,
>>> uint64_t npages, bool ih);
>>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>> *pc_info);
>>> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>> #else
>>> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>> @@ -49,6 +57,11 @@ static inline void
>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>> {
>>> }
>>>
>>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>>> + VTDPASIDCacheInfo *pc_info)
>>> +{
>>> +}
>>> +
>>> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>> {
>>> }
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>> index c7e107fe87..ede4db6d2d 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>>>
>>> #define PASID_0 0
>>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL |
>> ~VTD_HAW_MASK(aw))
>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
>>> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
>>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>>> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
>>> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) &
>> VTD_PASID_DIR_BITS_MASK)
>>> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable */
>>> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
>>> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>>> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) &
>> VTD_PASID_TABLE_BITS_MASK)
>>> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable
>> */
>>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>>> PCIBus *bus;
>>> uint8_t devfn;
>>> HostIOMMUDevice *hiod;
>>> + QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>>> } VTDHostIOMMUDevice;
>>>
>>> /*
>>> @@ -768,6 +771,11 @@ static inline int
>> vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>> return memcmp(p1, p2, sizeof(*p1));
>>> }
>>>
>>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>>> +{
>>> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>>> +}
>>> +
>>> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>>> VTDPASIDDirEntry *pdire);
>>> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 744b5967b2..984adc639a 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState
>> *s, VTDPASIDCacheInfo *pc_info)
>>> g_hash_table_foreach(s->vtd_address_spaces,
>> vtd_pasid_cache_sync_locked,
>>> pc_info);
>>> vtd_iommu_unlock(s);
>>> +
>>> + vtd_pasid_cache_sync_accel(s, pc_info);
>>> }
>>>
>>> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus,
>> void *opaque, int devfn,
>>> vtd_hiod->devfn = (uint8_t)devfn;
>>> vtd_hiod->iommu_state = s;
>>> vtd_hiod->hiod = hiod;
>>> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>>
>>> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>> g_free(vtd_hiod);
>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>> index c2757f3bcd..0acf3ae77f 100644
>>> --- a/hw/i386/intel_iommu_accel.c
>>> +++ b/hw/i386/intel_iommu_accel.c
>>> @@ -257,6 +257,176 @@ void
>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>> vtd_flush_host_piotlb_locked, &piotlb_info);
>>> }
>>>
>>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>>> + VTDPASIDEntry *pe)
>>> +{
>>
>> then you can name this as vtd_accel_fill_pc(). And I think you would
>> have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
>
> Yes, in current implementation this patch handles pasid entry addition and update,
> next patch handles removal.
> What's the benefit if we move pasid entry update into next patch?
If no redundant flush, for a newly created pasid entry, you need not to
loop the pasid cache entry list. But, it's possible to have such guest. :)
>>
>>> + VTDACCELPASIDCacheEntry *vtd_pce;
>>> +
>>> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>>> + if (vtd_pce->pasid == pasid) {
>>> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>>> + vtd_pce->pe = *pe;
>>> + }
>>> + return;
>>> + }
>>> + }
>>
>> hence this loop can be avoided.
>
> Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
> to bypass existing pasid entry to avoid adding duplicate pasid entry.
> Let me know if I misunderstand what you mean.
Although I have an idea to handle it. But it relies on some kind of flag
returned by the removal/update processing path to tell no need to re-
create pasid cache entry in the vtd_replay_pasid_bind_for_dev() path. I
don't think we need to make things so complicated. So let's follow the
current splitting. But I think you still can name this helper as
vtd_accel_fill_pc(). "find" sometimes means return something back.
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
2026-03-23 7:38 ` Yi Liu
@ 2026-03-23 8:11 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 8:11 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/23/26 13:50, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>>> pc_inv_dsc request
>>>
>>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>>> device and passthrough device without PASID, e.g., address space,
>>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>>> all these are useless and become a burden for passthrough device with
>>>> PASID.
>>>>
>>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>>> all registered to memory core and impact the whole system performance.
>>>>
>>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>>> of a passthrough device, we define a light weight structure
>>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>>> will use this struct as a parameter to conduct binding/unbinding to
>>>> nested hwpt and to record the current binded nested hwpt. It's also
>>>
>>> s/binded/bound/
>>
>> OK.
>>
>>>
>>>> designed to support PASID_0.
>>>>
>>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>>> (pasid cache invalidation) request, walk through each pasid in each
>>>> passthrough device for valid pasid entries, create a new
>>>> VTDACCELPASIDCacheEntry if not existing yet.
>>>
>>> I think some tweak is preferred w.r.t. this and the next patch.
>>>
>>> In this patch you only need to handle the PASID entry addition. Hence
>>> you assume no existing VTDACCELPASIDCacheEntry yet.
>>
>> [...]
>>
>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>> hw/i386/intel_iommu_accel.h | 13 +++
>>>> hw/i386/intel_iommu_internal.h | 8 ++
>>>> hw/i386/intel_iommu.c | 3 +
>>>> hw/i386/intel_iommu_accel.c | 170
>+++++++++++++++++++++++++++++++++
>>>> 4 files changed, 194 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>>> index e5f0b077b4..a77fd06fe0 100644
>>>> --- a/hw/i386/intel_iommu_accel.h
>>>> +++ b/hw/i386/intel_iommu_accel.h
>>>> @@ -12,6 +12,13 @@
>>>> #define HW_I386_INTEL_IOMMU_ACCEL_H
>>>> #include CONFIG_DEVICES
>>>>
>>>> +typedef struct VTDACCELPASIDCacheEntry {
>>>> + VTDHostIOMMUDevice *vtd_hiod;
>>>> + VTDPASIDEntry pe;
>>>> + uint32_t pasid;
>>>> + QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>>>> +} VTDACCELPASIDCacheEntry;
>>>
>>> btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
>>
>> Sure.
>>
>>>> +
>>>> #ifdef CONFIG_VTD_ACCEL
>>>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>>> *vtd_hiod,
>>>> Error **errp);
>>>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>>> *vtd_as, Error **errp);
>>>> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>>> domain_id,
>>>> uint32_t pasid, hwaddr addr,
>>>> uint64_t npages, bool ih);
>>>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>>> *pc_info);
>>>> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>>> #else
>>>> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>>> @@ -49,6 +57,11 @@ static inline void
>>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>>> {
>>>> }
>>>>
>>>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>>>> + VTDPASIDCacheInfo *pc_info)
>>>> +{
>>>> +}
>>>> +
>>>> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>>> {
>>>> }
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>>>> index c7e107fe87..ede4db6d2d 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>>>>
>>>> #define PASID_0 0
>>>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
>>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL |
>>> ~VTD_HAW_MASK(aw))
>>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
>>>> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
>>>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>>>> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL)
>>>> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) &
>>> VTD_PASID_DIR_BITS_MASK)
>>>> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing Disable
>*/
>>>> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6)
>>>> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>>>> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) &
>>> VTD_PASID_TABLE_BITS_MASK)
>>>> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing
>Disable
>>> */
>>>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>>>> PCIBus *bus;
>>>> uint8_t devfn;
>>>> HostIOMMUDevice *hiod;
>>>> + QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>>>> } VTDHostIOMMUDevice;
>>>>
>>>> /*
>>>> @@ -768,6 +771,11 @@ static inline int
>>> vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>>> return memcmp(p1, p2, sizeof(*p1));
>>>> }
>>>>
>>>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>>>> +{
>>>> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>>>> +}
>>>> +
>>>> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t
>pasid,
>>>> VTDPASIDDirEntry *pdire);
>>>> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 744b5967b2..984adc639a 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -3202,6 +3202,8 @@ static void
>vtd_pasid_cache_sync(IntelIOMMUState
>>> *s, VTDPASIDCacheInfo *pc_info)
>>>> g_hash_table_foreach(s->vtd_address_spaces,
>>> vtd_pasid_cache_sync_locked,
>>>> pc_info);
>>>> vtd_iommu_unlock(s);
>>>> +
>>>> + vtd_pasid_cache_sync_accel(s, pc_info);
>>>> }
>>>>
>>>> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>>>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus
>*bus,
>>> void *opaque, int devfn,
>>>> vtd_hiod->devfn = (uint8_t)devfn;
>>>> vtd_hiod->iommu_state = s;
>>>> vtd_hiod->hiod = hiod;
>>>> + QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>>>
>>>> if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>>> g_free(vtd_hiod);
>>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>>> index c2757f3bcd..0acf3ae77f 100644
>>>> --- a/hw/i386/intel_iommu_accel.c
>>>> +++ b/hw/i386/intel_iommu_accel.c
>>>> @@ -257,6 +257,176 @@ void
>>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>>> vtd_flush_host_piotlb_locked, &piotlb_info);
>>>> }
>>>>
>>>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t
>pasid,
>>>> + VTDPASIDEntry *pe)
>>>> +{
>>>
>>> then you can name this as vtd_accel_fill_pc(). And I think you would
>>> have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
>>
>> Yes, in current implementation this patch handles pasid entry addition and
>update,
>> next patch handles removal.
>> What's the benefit if we move pasid entry update into next patch?
>
>If no redundant flush, for a newly created pasid entry, you need not to
>loop the pasid cache entry list. But, it's possible to have such guest. :)
>
>>>
>>>> + VTDACCELPASIDCacheEntry *vtd_pce;
>>>> +
>>>> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>>>> + if (vtd_pce->pasid == pasid) {
>>>> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>>>> + vtd_pce->pe = *pe;
>>>> + }
>>>> + return;
>>>> + }
>>>> + }
>>>
>>> hence this loop can be avoided.
>>
>> Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
>> to bypass existing pasid entry to avoid adding duplicate pasid entry.
>> Let me know if I misunderstand what you mean.
>
>Although I have an idea to handle it. But it relies on some kind of flag
>returned by the removal/update processing path to tell no need to re-
>create pasid cache entry in the vtd_replay_pasid_bind_for_dev() path. I
>don't think we need to make things so complicated. So let's follow the
>current splitting. But I think you still can name this helper as
>vtd_accel_fill_pc(). "find" sometimes means return something back.
Got it, will do.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (6 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-20 10:08 ` Yi Liu
2026-03-06 3:44 ` [PATCH v1 09/13] intel_iommu: Handle PASID entry removal for system reset Zhenzhong Duan
` (4 subsequent siblings)
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When guest deletes PASID entries, QEMU will capture the pasid cache
invalidation request, walk through pasid_cache_list in each passthrough
device to find stale VTDACCELPASIDCacheEntry and delete them.
This happen before the PASID entry addition, because a new added entry
should never be removed.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.c | 70 +++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 0acf3ae77f..5a956af916 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -16,6 +16,28 @@
#include "hw/pci/pci_bus.h"
#include "trace.h"
+static inline int vtd_hiod_get_pe_from_pasid(VTDACCELPASIDCacheEntry *vtd_pce,
+ VTDPASIDEntry *pe)
+{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ uint32_t pasid = vtd_pce->pasid;
+ VTDContextEntry ce;
+ int ret;
+
+ if (!s->dmar_enabled || !s->root_scalable) {
+ return -VTD_FR_RTADDR_INV_TTM;
+ }
+
+ ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
+ vtd_hiod->devfn, &ce);
+ if (ret) {
+ return ret;
+ }
+
+ return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
+}
+
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp)
{
@@ -257,6 +279,52 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
vtd_flush_host_piotlb_locked, &piotlb_info);
}
+static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry *vtd_pce,
+ VTDPASIDCacheInfo *pc_info)
+{
+ VTDPASIDEntry pe;
+ uint16_t did;
+
+ /*
+ * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
+ * DID check. If DID doesn't match the value in cache or memory, then
+ * it's not a pasid entry we want to invalidate.
+ */
+ switch (pc_info->type) {
+ case VTD_INV_DESC_PASIDC_G_PASID_SI:
+ if (pc_info->pasid != vtd_pce->pasid) {
+ return;
+ }
+ /* Fall through */
+ case VTD_INV_DESC_PASIDC_G_DSI:
+ did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pe);
+ if (pc_info->did != did) {
+ return;
+ }
+ }
+
+ if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
+ /*
+ * No valid pasid entry in guest memory. e.g. pasid entry was modified
+ * to be either all-zero or non-present. Either case means existing
+ * pasid cache should be invalidated.
+ */
+ QLIST_REMOVE(vtd_pce, next);
+ g_free(vtd_pce);
+ }
+}
+
+/* Delete invalid pasid cache entry from pasid_cache_list */
+static void vtd_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
+ VTDPASIDCacheInfo *pc_info)
+{
+ VTDACCELPASIDCacheEntry *vtd_pce, *next;
+
+ QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
+ vtd_pasid_cache_invalidate_one(vtd_pce, pc_info);
+ }
+}
+
static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
VTDPASIDEntry *pe)
{
@@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
continue;
}
+
+ vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
}
}
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-06 3:44 ` [PATCH v1 08/13] intel_iommu: Handle PASID entry removal " Zhenzhong Duan
@ 2026-03-20 10:08 ` Yi Liu
2026-03-23 6:08 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-20 10:08 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, xudong.hao
On 3/6/26 11:44, Zhenzhong Duan wrote:
> When guest deletes PASID entries, QEMU will capture the pasid cache
> invalidation request, walk through pasid_cache_list in each passthrough
> device to find stale VTDACCELPASIDCacheEntry and delete them.
As the comment to the last patch, I think this patch should handle both
update and removal of PASID entry.
> This happen before the PASID entry addition, because a new added entry
> should never be removed.
This is confusing. TBH. this order is not quite important as the current
implementation, we don't have multiple pasid entires share same cache
entry, so a given pasid entry, we either add new cache entry or
update/delete cache entry. So I would just drop this line.
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_accel.c | 70 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 70 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index 0acf3ae77f..5a956af916 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -16,6 +16,28 @@
> #include "hw/pci/pci_bus.h"
> #include "trace.h"
>
> +static inline int vtd_hiod_get_pe_from_pasid(VTDACCELPASIDCacheEntry *vtd_pce,
> + VTDPASIDEntry *pe)
> +{
> + VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
> + IntelIOMMUState *s = vtd_hiod->iommu_state;
> + uint32_t pasid = vtd_pce->pasid;
> + VTDContextEntry ce;
> + int ret;
> +
> + if (!s->dmar_enabled || !s->root_scalable) {
> + return -VTD_FR_RTADDR_INV_TTM;
> + }
> +
> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> + vtd_hiod->devfn, &ce);
> + if (ret) {
> + return ret;
> + }
> +
> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
> +}
> +
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp)
> {
> @@ -257,6 +279,52 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> vtd_flush_host_piotlb_locked, &piotlb_info);
> }
>
> +static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry *vtd_pce,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + VTDPASIDEntry pe;
> + uint16_t did;
> +
> + /*
> + * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
> + * DID check. If DID doesn't match the value in cache or memory, then
> + * it's not a pasid entry we want to invalidate.
> + */
> + switch (pc_info->type) {
> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
> + if (pc_info->pasid != vtd_pce->pasid) {
> + return;
> + }
> + /* Fall through */
> + case VTD_INV_DESC_PASIDC_G_DSI:
> + did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pe);
> + if (pc_info->did != did) {
> + return;
> + }
> + }
> +
> + if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
> + /*
> + * No valid pasid entry in guest memory. e.g. pasid entry was modified
> + * to be either all-zero or non-present. Either case means existing
> + * pasid cache should be invalidated.
> + */
> + QLIST_REMOVE(vtd_pce, next);
> + g_free(vtd_pce);
call vtd_accel_update_pc() and vtd_accel_delete_pc() accordingly.
> + }
> +}
> +
> +/* Delete invalid pasid cache entry from pasid_cache_list */
> +static void vtd_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + VTDACCELPASIDCacheEntry *vtd_pce, *next;
> +
> + QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
> + vtd_pasid_cache_invalidate_one(vtd_pce, pc_info);
> + }
> +}
> +
> static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> VTDPASIDEntry *pe)
> {
> @@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> continue;
> }
> +
> + vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
> vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> }
> }
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-20 10:08 ` Yi Liu
@ 2026-03-23 6:08 ` Duan, Zhenzhong
2026-03-23 7:40 ` Yi Liu
2026-03-23 7:43 ` Yi Liu
0 siblings, 2 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 6:08 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for
>pc_inv_dsc request
>
>On 3/6/26 11:44, Zhenzhong Duan wrote:
>> When guest deletes PASID entries, QEMU will capture the pasid cache
>> invalidation request, walk through pasid_cache_list in each passthrough
>> device to find stale VTDACCELPASIDCacheEntry and delete them.
>
>As the comment to the last patch, I think this patch should handle both
>update and removal of PASID entry.
>
>> This happen before the PASID entry addition, because a new added entry
>> should never be removed.
>
>This is confusing. TBH. this order is not quite important as the current
>implementation, we don't have multiple pasid entires share same cache
>entry, so a given pasid entry, we either add new cache entry or
>update/delete cache entry. So I would just drop this line.
OK, will remove it.
In fact, the order is for performance reason. Imaging there are lots of pasid entires
in pasid_cache_list, we remove stale entries first, then in addition code when we
loop on pasid_cache_list to see if a new pasid entry already exists, we can iterate
less entries.
Thanks
Zhenzhong
>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_accel.c | 70
>+++++++++++++++++++++++++++++++++++++
>> 1 file changed, 70 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index 0acf3ae77f..5a956af916 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -16,6 +16,28 @@
>> #include "hw/pci/pci_bus.h"
>> #include "trace.h"
>>
>> +static inline int vtd_hiod_get_pe_from_pasid(VTDACCELPASIDCacheEntry
>*vtd_pce,
>> + VTDPASIDEntry *pe)
>> +{
>> + VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
>> + IntelIOMMUState *s = vtd_hiod->iommu_state;
>> + uint32_t pasid = vtd_pce->pasid;
>> + VTDContextEntry ce;
>> + int ret;
>> +
>> + if (!s->dmar_enabled || !s->root_scalable) {
>> + return -VTD_FR_RTADDR_INV_TTM;
>> + }
>> +
>> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
>> + vtd_hiod->devfn, &ce);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>> +}
>> +
>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>> Error **errp)
>> {
>> @@ -257,6 +279,52 @@ void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>> vtd_flush_host_piotlb_locked, &piotlb_info);
>> }
>>
>> +static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry
>*vtd_pce,
>> + VTDPASIDCacheInfo *pc_info)
>> +{
>> + VTDPASIDEntry pe;
>> + uint16_t did;
>> +
>> + /*
>> + * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI
>require
>> + * DID check. If DID doesn't match the value in cache or memory, then
>> + * it's not a pasid entry we want to invalidate.
>> + */
>> + switch (pc_info->type) {
>> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> + if (pc_info->pasid != vtd_pce->pasid) {
>> + return;
>> + }
>> + /* Fall through */
>> + case VTD_INV_DESC_PASIDC_G_DSI:
>> + did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pe);
>> + if (pc_info->did != did) {
>> + return;
>> + }
>> + }
>> +
>> + if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
>> + /*
>> + * No valid pasid entry in guest memory. e.g. pasid entry was modified
>> + * to be either all-zero or non-present. Either case means existing
>> + * pasid cache should be invalidated.
>> + */
>> + QLIST_REMOVE(vtd_pce, next);
>> + g_free(vtd_pce);
>
>call vtd_accel_update_pc() and vtd_accel_delete_pc() accordingly.
>
>> + }
>> +}
>> +
>> +/* Delete invalid pasid cache entry from pasid_cache_list */
>> +static void vtd_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
>> + VTDPASIDCacheInfo *pc_info)
>> +{
>> + VTDACCELPASIDCacheEntry *vtd_pce, *next;
>> +
>> + QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
>> + vtd_pasid_cache_invalidate_one(vtd_pce, pc_info);
>> + }
>> +}
>> +
>> static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>> VTDPASIDEntry *pe)
>> {
>> @@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>VTDPASIDCacheInfo *pc_info)
>> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>> continue;
>> }
>> +
>> + vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
>> vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>> }
>> }
^ permalink raw reply [flat|nested] 45+ messages in thread* Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-23 6:08 ` Duan, Zhenzhong
@ 2026-03-23 7:40 ` Yi Liu
2026-03-23 8:12 ` Duan, Zhenzhong
2026-03-23 7:43 ` Yi Liu
1 sibling, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-23 7:40 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
On 3/23/26 14:08, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for
>> pc_inv_dsc request
>>
>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>> When guest deletes PASID entries, QEMU will capture the pasid cache
>>> invalidation request, walk through pasid_cache_list in each passthrough
>>> device to find stale VTDACCELPASIDCacheEntry and delete them.
>>
>> As the comment to the last patch, I think this patch should handle both
>> update and removal of PASID entry.
>>
>>> This happen before the PASID entry addition, because a new added entry
>>> should never be removed.
>>
>> This is confusing. TBH. this order is not quite important as the current
>> implementation, we don't have multiple pasid entires share same cache
>> entry, so a given pasid entry, we either add new cache entry or
>> update/delete cache entry. So I would just drop this line.
>
> OK, will remove it.
>
> In fact, the order is for performance reason. Imaging there are lots of pasid entires
> in pasid_cache_list, we remove stale entries first, then in addition code when we
> loop on pasid_cache_list to see if a new pasid entry already exists, we can iterate
> less entries.
got it. you can mark it in the code.
> Thanks
> Zhenzhong
>
>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> hw/i386/intel_iommu_accel.c | 70
>> +++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 70 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>> index 0acf3ae77f..5a956af916 100644
>>> --- a/hw/i386/intel_iommu_accel.c
>>> +++ b/hw/i386/intel_iommu_accel.c
>>> @@ -16,6 +16,28 @@
>>> #include "hw/pci/pci_bus.h"
>>> #include "trace.h"
>>>
>>> +static inline int vtd_hiod_get_pe_from_pasid(VTDACCELPASIDCacheEntry
>> *vtd_pce,
>>> + VTDPASIDEntry *pe)
>>> +{
>>> + VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
>>> + IntelIOMMUState *s = vtd_hiod->iommu_state;
>>> + uint32_t pasid = vtd_pce->pasid;
>>> + VTDContextEntry ce;
>>> + int ret;
>>> +
>>> + if (!s->dmar_enabled || !s->root_scalable) {
>>> + return -VTD_FR_RTADDR_INV_TTM;
>>> + }
>>> +
>>> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
>>> + vtd_hiod->devfn, &ce);
>>> + if (ret) {
>>> + return ret;
>>> + }
>>> +
>>> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>>> +}
>>> +
>>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>> *vtd_hiod,
>>> Error **errp)
>>> {
>>> @@ -257,6 +279,52 @@ void
>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>> vtd_flush_host_piotlb_locked, &piotlb_info);
>>> }
>>>
>>> +static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry
>> *vtd_pce,
>>> + VTDPASIDCacheInfo *pc_info)
>>> +{
>>> + VTDPASIDEntry pe;
>>> + uint16_t did;
>>> +
>>> + /*
>>> + * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI
>> require
>>> + * DID check. If DID doesn't match the value in cache or memory, then
>>> + * it's not a pasid entry we want to invalidate.
>>> + */
>>> + switch (pc_info->type) {
>>> + case VTD_INV_DESC_PASIDC_G_PASID_SI:
>>> + if (pc_info->pasid != vtd_pce->pasid) {
>>> + return;
>>> + }
>>> + /* Fall through */
>>> + case VTD_INV_DESC_PASIDC_G_DSI:
>>> + did = VTD_SM_PASID_ENTRY_DID(&vtd_pce->pe);
>>> + if (pc_info->did != did) {
>>> + return;
>>> + }
>>> + }
>>> +
>>> + if (vtd_hiod_get_pe_from_pasid(vtd_pce, &pe)) {
>>> + /*
>>> + * No valid pasid entry in guest memory. e.g. pasid entry was modified
>>> + * to be either all-zero or non-present. Either case means existing
>>> + * pasid cache should be invalidated.
>>> + */
>>> + QLIST_REMOVE(vtd_pce, next);
>>> + g_free(vtd_pce);
>>
>> call vtd_accel_update_pc() and vtd_accel_delete_pc() accordingly.
>>
>>> + }
>>> +}
>>> +
>>> +/* Delete invalid pasid cache entry from pasid_cache_list */
>>> +static void vtd_pasid_cache_invalidate(VTDHostIOMMUDevice *vtd_hiod,
>>> + VTDPASIDCacheInfo *pc_info)
>>> +{
>>> + VTDACCELPASIDCacheEntry *vtd_pce, *next;
>>> +
>>> + QLIST_FOREACH_SAFE(vtd_pce, &vtd_hiod->pasid_cache_list, next, next) {
>>> + vtd_pasid_cache_invalidate_one(vtd_pce, pc_info);
>>> + }
>>> +}
>>> +
>>> static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>>> VTDPASIDEntry *pe)
>>> {
>>> @@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>> VTDPASIDCacheInfo *pc_info)
>>> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>>> continue;
>>> }
>>> +
>>> + vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
>>> vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>>> }
>>> }
>
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-23 7:40 ` Yi Liu
@ 2026-03-23 8:12 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 8:12 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for
>pc_inv_dsc request
>
>On 3/23/26 14:08, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for
>>> pc_inv_dsc request
>>>
>>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>>> When guest deletes PASID entries, QEMU will capture the pasid cache
>>>> invalidation request, walk through pasid_cache_list in each passthrough
>>>> device to find stale VTDACCELPASIDCacheEntry and delete them.
>>>
>>> As the comment to the last patch, I think this patch should handle both
>>> update and removal of PASID entry.
>>>
>>>> This happen before the PASID entry addition, because a new added entry
>>>> should never be removed.
>>>
>>> This is confusing. TBH. this order is not quite important as the current
>>> implementation, we don't have multiple pasid entires share same cache
>>> entry, so a given pasid entry, we either add new cache entry or
>>> update/delete cache entry. So I would just drop this line.
>>
>> OK, will remove it.
>>
>> In fact, the order is for performance reason. Imaging there are lots of pasid
>entires
>> in pasid_cache_list, we remove stale entries first, then in addition code when we
>> loop on pasid_cache_list to see if a new pasid entry already exists, we can iterate
>> less entries.
>
>got it. you can mark it in the code.
Will do.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-23 6:08 ` Duan, Zhenzhong
2026-03-23 7:40 ` Yi Liu
@ 2026-03-23 7:43 ` Yi Liu
2026-03-23 8:41 ` Duan, Zhenzhong
1 sibling, 1 reply; 45+ messages in thread
From: Yi Liu @ 2026-03-23 7:43 UTC (permalink / raw)
To: Duan, Zhenzhong, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
On 3/23/26 14:08, Duan, Zhenzhong wrote:
>>> static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>>> VTDPASIDEntry *pe)
>>> {
>>> @@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>> VTDPASIDCacheInfo *pc_info)
>>> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>>> continue;
>>> }
>>> +
>>> + vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
>>> vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
a minor optimization, for PASDI selective, if
vtd_pasid_cache_invalidate() already handled the invalidation, then the
vtd_replay_pasid_bind_for_dev() can be skipped.
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for pc_inv_dsc request
2026-03-23 7:43 ` Yi Liu
@ 2026-03-23 8:41 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-23 8:41 UTC (permalink / raw)
To: Liu, Yi L, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
Tian, Kevin, Hao, Xudong
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 08/13] intel_iommu: Handle PASID entry removal for
>pc_inv_dsc request
>
>On 3/23/26 14:08, Duan, Zhenzhong wrote:
>
>>>> static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t
>pasid,
>>>> VTDPASIDEntry *pe)
>>>> {
>>>> @@ -423,6 +491,8 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState
>*s,
>>> VTDPASIDCacheInfo *pc_info)
>>>> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>>>> continue;
>>>> }
>>>> +
>>>> + vtd_pasid_cache_invalidate(vtd_hiod, pc_info);
>>>> vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>
>a minor optimization, for PASDI selective, if
>vtd_pasid_cache_invalidate() already handled the invalidation, then the
>vtd_replay_pasid_bind_for_dev() can be skipped.
Good idea, will do.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 09/13] intel_iommu: Handle PASID entry removal for system reset
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (7 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 08/13] intel_iommu: Handle PASID entry removal " Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-06 3:44 ` [PATCH v1 10/13] intel_iommu_accel: Support pasid binding/unbinding and PIOTLB flushing Zhenzhong Duan
` (3 subsequent siblings)
12 siblings, 0 replies; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
When system level reset, DMA translation is turned off, all PASID
entries become stale and should be deleted.
vtd_hiod list is never accessed without BQL, so no need to guard with
iommu lock.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.h | 5 +++++
hw/i386/intel_iommu.c | 2 ++
hw/i386/intel_iommu_accel.c | 13 +++++++++++++
3 files changed, 20 insertions(+)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index a77fd06fe0..914c690c26 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -28,6 +28,7 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
+void vtd_pasid_cache_reset_accel(IntelIOMMUState *s);
void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
#else
static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
@@ -62,6 +63,10 @@ static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
{
}
+static inline void vtd_pasid_cache_reset_accel(IntelIOMMUState *s)
+{
+}
+
static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
{
}
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 984adc639a..b7d487bf5a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -391,6 +391,8 @@ static void vtd_reset_caches(IntelIOMMUState *s)
vtd_reset_context_cache_locked(s);
vtd_pasid_cache_reset_locked(s);
vtd_iommu_unlock(s);
+
+ vtd_pasid_cache_reset_accel(s);
}
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index 5a956af916..c1890ca0a5 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -497,6 +497,19 @@ void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
}
}
+/* Fake a gloal pasid cache invalidation to remove all pasid cache entries */
+void vtd_pasid_cache_reset_accel(IntelIOMMUState *s)
+{
+ VTDPASIDCacheInfo pc_info = { .type = VTD_INV_DESC_PASIDC_G_GLOBAL };
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter as_it;
+
+ g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
+ vtd_pasid_cache_invalidate(vtd_hiod, &pc_info);
+ }
+}
+
static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
void *caps, uint32_t size)
{
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* [PATCH v1 10/13] intel_iommu_accel: Support pasid binding/unbinding and PIOTLB flushing
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (8 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 09/13] intel_iommu: Handle PASID entry removal for system reset Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-06 3:44 ` [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked() Zhenzhong Duan
` (2 subsequent siblings)
12 siblings, 0 replies; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
We just switched to use VTDACCELPASIDCacheEntry to cache pasid entry of
passthrough device, also need to switch the binding/unbinding and PIOTLB
flushing functions to use the same structure.
After the switching, we could remove accel related code from
vtd_pasid_cache_[reset/sync]_locked() to make intel_iommu.c cleaner.
The VTDAddressSpace of PASID_0 is still useful as VTD supports a legacy
mode which needs shadow page table instead of nested page table.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.h | 2 +-
include/hw/i386/intel_iommu.h | 2 -
hw/i386/intel_iommu.c | 17 +----
hw/i386/intel_iommu_accel.c | 125 +++++++++++++++++-----------------
4 files changed, 64 insertions(+), 82 deletions(-)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index 914c690c26..1ae46d9250 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -16,6 +16,7 @@ typedef struct VTDACCELPASIDCacheEntry {
VTDHostIOMMUDevice *vtd_hiod;
VTDPASIDEntry pe;
uint32_t pasid;
+ uint32_t fs_hwpt_id;
QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
} VTDACCELPASIDCacheEntry;
@@ -23,7 +24,6 @@ typedef struct VTDACCELPASIDCacheEntry {
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp);
VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
-bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
uint32_t pasid, hwaddr addr,
uint64_t npages, bool ih);
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index bb957b93e0..64f963412a 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -154,8 +154,6 @@ struct VTDAddressSpace {
* with the guest IOMMU pgtables for a device.
*/
IOVATree *iova_tree;
-
- uint32_t fs_hwpt_id;
};
struct VTDIOTLBEntry {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b7d487bf5a..edd2b8f0cc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -86,8 +86,6 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
if (pc_entry->valid) {
pc_entry->valid = false;
- /* It's fatal to get failure during reset */
- vtd_propagate_guest_pasid(vtd_as, &error_fatal);
}
}
}
@@ -3126,8 +3124,6 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
VTDPASIDEntry pe;
IOMMUNotifier *n;
uint16_t did;
- const char *err_prefix = "Attaching to HWPT failed: ";
- Error *local_err = NULL;
if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
if (!pc_entry->valid) {
@@ -3148,9 +3144,6 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
vtd_address_space_unmap(vtd_as, n);
}
vtd_switch_address_space(vtd_as);
-
- err_prefix = "Detaching from HWPT failed: ";
- goto do_bind_unbind;
}
/*
@@ -3178,20 +3171,12 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
if (!pc_entry->valid) {
pc_entry->pasid_entry = pe;
pc_entry->valid = true;
- } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
- err_prefix = "Replacing HWPT attachment failed: ";
- } else {
+ } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
return;
}
vtd_switch_address_space(vtd_as);
vtd_address_space_sync(vtd_as);
-
-do_bind_unbind:
- /* TODO: Fault event injection into guest, report error to QEMU for now */
- if (!vtd_propagate_guest_pasid(vtd_as, &local_err)) {
- error_reportf_err(local_err, "%s", err_prefix);
- }
}
static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index c1890ca0a5..d7c1ff6b74 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -111,23 +111,24 @@ static bool vtd_create_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
}
static void vtd_destroy_old_fs_hwpt(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as)
+ VTDACCELPASIDCacheEntry *vtd_pce)
{
HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
- if (!vtd_as->fs_hwpt_id) {
+ if (!vtd_pce->fs_hwpt_id) {
return;
}
- iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt_id);
- vtd_as->fs_hwpt_id = 0;
+ iommufd_backend_free_id(idev->iommufd, vtd_pce->fs_hwpt_id);
+ vtd_pce->fs_hwpt_id = 0;
}
-static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as, Error **errp)
+static bool vtd_device_attach_iommufd(VTDACCELPASIDCacheEntry *vtd_pce,
+ Error **errp)
{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
- VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
- uint32_t hwpt_id = idev->hwpt_id;
+ VTDPASIDEntry *pe = &vtd_pce->pe;
+ uint32_t hwpt_id = idev->hwpt_id, pasid = vtd_pce->pasid;
bool ret;
/*
@@ -147,14 +148,13 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
}
}
- ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID, hwpt_id,
- errp);
- trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
+ ret = host_iommu_device_iommufd_attach_hwpt(idev, pasid, hwpt_id, errp);
+ trace_vtd_device_attach_hwpt(idev->devid, pasid, hwpt_id, ret);
if (ret) {
/* Destroy old fs_hwpt if it's a replacement */
- vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_pce);
if (vtd_pe_pgtt_is_fst(pe)) {
- vtd_as->fs_hwpt_id = hwpt_id;
+ vtd_pce->fs_hwpt_id = hwpt_id;
}
} else if (vtd_pe_pgtt_is_fst(pe)) {
iommufd_backend_free_id(idev->iommufd, hwpt_id);
@@ -163,16 +163,17 @@ static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
return ret;
}
-static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
- VTDAddressSpace *vtd_as, Error **errp)
+static bool vtd_device_detach_iommufd(VTDACCELPASIDCacheEntry *vtd_pce,
+ Error **errp)
{
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
- IntelIOMMUState *s = vtd_as->iommu_state;
- uint32_t pasid = vtd_as->pasid;
+ IntelIOMMUState *s = vtd_hiod->iommu_state;
+ uint32_t pasid = vtd_pce->pasid;
bool ret;
- if (s->dmar_enabled && s->root_scalable) {
- ret = host_iommu_device_iommufd_detach_hwpt(idev, IOMMU_NO_PASID, errp);
+ if (pasid != IOMMU_NO_PASID || (s->dmar_enabled && s->root_scalable)) {
+ ret = host_iommu_device_iommufd_detach_hwpt(idev, pasid, errp);
trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
} else {
/*
@@ -180,72 +181,47 @@ static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
* we fallback to the default HWPT which contains shadow page table.
* So guest DMA could still work.
*/
- ret = host_iommu_device_iommufd_attach_hwpt(idev, IOMMU_NO_PASID,
+ ret = host_iommu_device_iommufd_attach_hwpt(idev, pasid,
idev->hwpt_id, errp);
trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
ret);
}
if (ret) {
- vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_as);
+ vtd_destroy_old_fs_hwpt(vtd_hiod, vtd_pce);
}
return ret;
}
-bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
-{
- VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
- VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
-
- /* Ignore emulated device or legacy VFIO backed device */
- if (!vtd_as->iommu_state->fsts || !vtd_hiod) {
- return true;
- }
-
- if (pc_entry->valid) {
- return vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
- }
-
- return vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
-}
-
/*
- * This function is a loop function for the s->vtd_address_spaces
- * list with VTDPIOTLBInvInfo as execution filter. It propagates
- * the piotlb invalidation to host.
+ * This function is a loop function for the s->vtd_host_iommu_dev
+ * and vtd_hiod->pasid_cache_list lists with VTDPIOTLBInvInfo as
+ * execution filter. It propagates the piotlb invalidation to host.
*/
-static void vtd_flush_host_piotlb_locked(gpointer key, gpointer value,
- gpointer user_data)
+static void vtd_flush_host_piotlb(VTDACCELPASIDCacheEntry *vtd_pce,
+ VTDPIOTLBInvInfo *piotlb_info)
{
- VTDPIOTLBInvInfo *piotlb_info = user_data;
- VTDAddressSpace *vtd_as = value;
- VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
- VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+ VTDHostIOMMUDevice *vtd_hiod = vtd_pce->vtd_hiod;
+ VTDPASIDEntry *pe = &vtd_pce->pe;
uint16_t did;
- if (!vtd_hiod) {
- return;
- }
-
- assert(vtd_as->pasid == PCI_NO_PASID);
-
/* Nothing to do if there is no first stage HWPT attached */
- if (!pc_entry->valid ||
- !vtd_pe_pgtt_is_fst(&pc_entry->pasid_entry)) {
+ if (!vtd_pe_pgtt_is_fst(pe)) {
return;
}
- did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
+ did = VTD_SM_PASID_ENTRY_DID(pe);
- if (piotlb_info->domain_id == did && piotlb_info->pasid == PASID_0) {
+ if (piotlb_info->domain_id == did && piotlb_info->pasid == vtd_pce->pasid) {
HostIOMMUDeviceIOMMUFD *idev =
HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
uint32_t entry_num = 1; /* Only implement one request for simplicity */
Error *local_err = NULL;
struct iommu_hwpt_vtd_s1_invalidate *cache = piotlb_info->inv_data;
- if (!iommufd_backend_invalidate_cache(idev->iommufd, vtd_as->fs_hwpt_id,
+ if (!iommufd_backend_invalidate_cache(idev->iommufd,
+ vtd_pce->fs_hwpt_id,
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
sizeof(*cache), &entry_num, cache,
&local_err)) {
@@ -261,6 +237,8 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
{
struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
VTDPIOTLBInvInfo piotlb_info;
+ VTDHostIOMMUDevice *vtd_hiod;
+ GHashTableIter as_it;
cache_info.addr = addr;
cache_info.npages = npages;
@@ -271,12 +249,19 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
piotlb_info.inv_data = &cache_info;
/*
- * Go through each vtd_as instance in s->vtd_address_spaces, find out
- * affected host devices which need host piotlb invalidation. Piotlb
- * invalidation should check pasid cache per architecture point of view.
+ * Go through each vtd_pce in vtd_hiod->pasid_cache_list for each host
+ * device, find out affected host device pasid which need host piotlb
+ * invalidation. Piotlb invalidation should check pasid cache per
+ * architecture point of view.
*/
- g_hash_table_foreach(s->vtd_address_spaces,
- vtd_flush_host_piotlb_locked, &piotlb_info);
+ g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
+ VTDACCELPASIDCacheEntry *vtd_pce;
+
+ QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
+ vtd_flush_host_piotlb(vtd_pce, &piotlb_info);
+ }
+ }
}
static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry *vtd_pce,
@@ -284,6 +269,7 @@ static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry *vtd_pce,
{
VTDPASIDEntry pe;
uint16_t did;
+ Error *local_err = NULL;
/*
* VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
@@ -309,6 +295,9 @@ static void vtd_pasid_cache_invalidate_one(VTDACCELPASIDCacheEntry *vtd_pce,
* to be either all-zero or non-present. Either case means existing
* pasid cache should be invalidated.
*/
+ if (!vtd_device_detach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s", "Detaching from HWPT failed: ");
+ }
QLIST_REMOVE(vtd_pce, next);
g_free(vtd_pce);
}
@@ -329,11 +318,17 @@ static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
VTDPASIDEntry *pe)
{
VTDACCELPASIDCacheEntry *vtd_pce;
+ Error *local_err = NULL;
QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
if (vtd_pce->pasid == pasid) {
if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
vtd_pce->pe = *pe;
+
+ if (!vtd_device_attach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s",
+ "Replacing HWPT attachment failed: ");
+ }
}
return;
}
@@ -344,6 +339,10 @@ static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
vtd_pce->pasid = pasid;
vtd_pce->pe = *pe;
QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
+
+ if (!vtd_device_attach_iommufd(vtd_pce, &local_err)) {
+ error_reportf_err(local_err, "%s", "Attaching to HWPT failed: ");
+ }
}
/*
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked()
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (9 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 10/13] intel_iommu_accel: Support pasid binding/unbinding and PIOTLB flushing Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-19 8:02 ` CLEMENT MATHIEU--DRIF
2026-03-06 3:44 ` [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
2026-03-06 3:44 ` [PATCH v1 13/13] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED when configured Zhenzhong Duan
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
In order to support PASID, we have switched from looping vtd_as to vtd_hiod,
vtd_hiod represents host passthrough device and never deferenced without BQL.
So we don't need extra iommu lock to protect it.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_accel.h | 14 +++++++-------
hw/i386/intel_iommu.c | 7 ++++---
hw/i386/intel_iommu_accel.c | 6 +++---
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index 1ae46d9250..3f1b1002b8 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -24,9 +24,9 @@ typedef struct VTDACCELPASIDCacheEntry {
bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
Error **errp);
VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
-void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
- uint32_t pasid, hwaddr addr,
- uint64_t npages, bool ih);
+void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
+ uint32_t pasid, hwaddr addr,
+ uint64_t npages, bool ih);
void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
void vtd_pasid_cache_reset_accel(IntelIOMMUState *s);
void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
@@ -51,10 +51,10 @@ static inline bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as,
return true;
}
-static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
- uint16_t domain_id,
- uint32_t pasid, hwaddr addr,
- uint64_t npages, bool ih)
+static inline void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s,
+ uint16_t domain_id,
+ uint32_t pasid, hwaddr addr,
+ uint64_t npages, bool ih)
{
}
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index edd2b8f0cc..3ea5b92b34 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3011,11 +3011,11 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
info.domain_id = domain_id;
info.pasid = pasid;
+ vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, 0, (uint64_t)-1,
+ false);
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
&info);
- vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, 0, (uint64_t)-1,
- false);
vtd_iommu_unlock(s);
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
@@ -3045,10 +3045,11 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
info.addr = addr;
info.mask = ~((1 << am) - 1);
+ vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, addr, 1 << am, ih);
+
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb,
vtd_hash_remove_by_page_piotlb, &info);
- vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, addr, 1 << am, ih);
vtd_iommu_unlock(s);
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index d7c1ff6b74..acb1b1e238 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -231,9 +231,9 @@ static void vtd_flush_host_piotlb(VTDACCELPASIDCacheEntry *vtd_pce,
}
}
-void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
- uint32_t pasid, hwaddr addr,
- uint64_t npages, bool ih)
+void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
+ uint32_t pasid, hwaddr addr,
+ uint64_t npages, bool ih)
{
struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
VTDPIOTLBInvInfo piotlb_info;
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked()
2026-03-06 3:44 ` [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked() Zhenzhong Duan
@ 2026-03-19 8:02 ` CLEMENT MATHIEU--DRIF
2026-03-19 9:07 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2026-03-19 8:02 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, xudong.hao@intel.com
vtd_piotlb_page_invalidate can be called from vtd_pri_perform_implicit_invalidation without any lock. Thus we can call vtd_flush_host_piotlb_all_accel unlocked. Is there a risk in case of concurrent hot plugging of another device?
cmd
On Thu, 2026-03-05 at 22:44 -0500, Zhenzhong Duan wrote:
> In order to support PASID, we have switched from looping vtd_as to vtd_hiod,
> vtd_hiod represents host passthrough device and never deferenced without BQL.
> So we don't need extra iommu lock to protect it.
>
> Signed-off-by: Zhenzhong Duan <[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>
> ---
> hw/i386/intel_iommu_accel.h | 14 +++++++-------
> hw/i386/intel_iommu.c | 7 ++++---
> hw/i386/intel_iommu_accel.c | 6 +++---
> 3 files changed, 14 insertions(+), 13 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index 1ae46d9250..3f1b1002b8 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -24,9 +24,9 @@ typedef struct VTDACCELPASIDCacheEntry {
> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> Error **errp);
> VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
> -void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> - uint32_t pasid, hwaddr addr,
> - uint64_t npages, bool ih);
> +void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
> + uint32_t pasid, hwaddr addr,
> + uint64_t npages, bool ih);
> void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
> void vtd_pasid_cache_reset_accel(IntelIOMMUState *s);
> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
> @@ -51,10 +51,10 @@ static inline bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as,
> return true;
> }
>
> -static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
> - uint16_t domain_id,
> - uint32_t pasid, hwaddr addr,
> - uint64_t npages, bool ih)
> +static inline void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s,
> + uint16_t domain_id,
> + uint32_t pasid, hwaddr addr,
> + uint64_t npages, bool ih)
> {
> }
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index edd2b8f0cc..3ea5b92b34 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3011,11 +3011,11 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> info.domain_id = domain_id;
> info.pasid = pasid;
>
> + vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, 0, (uint64_t)-1,
> + false);
> vtd_iommu_lock(s);
> g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> &info);
> - vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, 0, (uint64_t)-1,
> - false);
> vtd_iommu_unlock(s);
>
> QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> @@ -3045,10 +3045,11 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> info.addr = addr;
> info.mask = ~((1 << am) - 1);
>
> + vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, addr, 1 << am, ih);
> +
> vtd_iommu_lock(s);
> g_hash_table_foreach_remove(s->iotlb,
> vtd_hash_remove_by_page_piotlb, &info);
> - vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, addr, 1 << am, ih);
> vtd_iommu_unlock(s);
>
> vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index d7c1ff6b74..acb1b1e238 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -231,9 +231,9 @@ static void vtd_flush_host_piotlb(VTDACCELPASIDCacheEntry *vtd_pce,
> }
> }
>
> -void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
> - uint32_t pasid, hwaddr addr,
> - uint64_t npages, bool ih)
> +void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
> + uint32_t pasid, hwaddr addr,
> + uint64_t npages, bool ih)
> {
> struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
> VTDPIOTLBInvInfo piotlb_info;
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked()
2026-03-19 8:02 ` CLEMENT MATHIEU--DRIF
@ 2026-03-19 9:07 ` Duan, Zhenzhong
2026-03-20 4:04 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-19 9:07 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, Tian, Kevin, Liu, Yi L, Hao, Xudong
Hi Clement,
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@bull.com>
>Subject: Re: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in
>vtd_flush_host_piotlb_all_locked()
>
>vtd_piotlb_page_invalidate can be called from
>vtd_pri_perform_implicit_invalidation without any lock. Thus we can call
>vtd_flush_host_piotlb_all_accel unlocked. Is there a risk in case of concurrent hot
>plugging of another device?
Good catch, hotplug is protected by BQL, but if an iothread could send PRI request,
we suffer from the race.
I think we can bypass vtd_pri_perform_implicit_invalidation() for passthrough device
which doesn't cache iotlb entry in QEMU but in host, like:
@@ -5401,7 +5401,8 @@ static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn,
return -ENOSPC;
}
- if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
+ if (!vtd_find_hiod_iommufd(vtd_as) &&
+ vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
return -EINVAL;
}
Thanks
Zhenzhong
>
>cmd
>
>On Thu, 2026-03-05 at 22:44 -0500, Zhenzhong Duan wrote:
>> In order to support PASID, we have switched from looping vtd_as to vtd_hiod,
>> vtd_hiod represents host passthrough device and never deferenced without BQL.
>> So we don't need extra iommu lock to protect it.
>>
>> Signed-off-by: Zhenzhong Duan
><[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>
>> ---
>> hw/i386/intel_iommu_accel.h | 14 +++++++-------
>> hw/i386/intel_iommu.c | 7 ++++---
>> hw/i386/intel_iommu_accel.c | 6 +++---
>> 3 files changed, 14 insertions(+), 13 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index 1ae46d9250..3f1b1002b8 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -24,9 +24,9 @@ typedef struct VTDACCELPASIDCacheEntry {
>> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>> Error **errp);
>> VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
>> -void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>domain_id,
>> - uint32_t pasid, hwaddr addr,
>> - uint64_t npages, bool ih);
>> +void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
>> + uint32_t pasid, hwaddr addr,
>> + uint64_t npages, bool ih);
>> void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info);
>> void vtd_pasid_cache_reset_accel(IntelIOMMUState *s);
>> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>> @@ -51,10 +51,10 @@ static inline bool
>vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as,
>> return true;
>> }
>>
>> -static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>> - uint16_t domain_id,
>> - uint32_t pasid, hwaddr addr,
>> - uint64_t npages, bool ih)
>> +static inline void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s,
>> + uint16_t domain_id,
>> + uint32_t pasid, hwaddr addr,
>> + uint64_t npages, bool ih)
>> {
>> }
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index edd2b8f0cc..3ea5b92b34 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3011,11 +3011,11 @@ static void
>vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>> info.domain_id = domain_id;
>> info.pasid = pasid;
>>
>> + vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, 0, (uint64_t)-1,
>> + false);
>> vtd_iommu_lock(s);
>> g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
>> &info);
>> - vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, 0, (uint64_t)-1,
>> - false);
>> vtd_iommu_unlock(s);
>>
>> QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>> @@ -3045,10 +3045,11 @@ static void
>vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
>> info.addr = addr;
>> info.mask = ~((1 << am) - 1);
>>
>> + vtd_flush_host_piotlb_all_accel(s, domain_id, pasid, addr, 1 << am, ih);
>> +
>> vtd_iommu_lock(s);
>> g_hash_table_foreach_remove(s->iotlb,
>> vtd_hash_remove_by_page_piotlb, &info);
>> - vtd_flush_host_piotlb_all_locked(s, domain_id, pasid, addr, 1 << am, ih);
>> vtd_iommu_unlock(s);
>>
>> vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index d7c1ff6b74..acb1b1e238 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -231,9 +231,9 @@ static void
>vtd_flush_host_piotlb(VTDACCELPASIDCacheEntry *vtd_pce,
>> }
>> }
>>
>> -void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>domain_id,
>> - uint32_t pasid, hwaddr addr,
>> - uint64_t npages, bool ih)
>> +void vtd_flush_host_piotlb_all_accel(IntelIOMMUState *s, uint16_t domain_id,
>> + uint32_t pasid, hwaddr addr,
>> + uint64_t npages, bool ih)
>> {
>> struct iommu_hwpt_vtd_s1_invalidate cache_info = { 0 };
>> VTDPIOTLBInvInfo piotlb_info;
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked()
2026-03-19 9:07 ` Duan, Zhenzhong
@ 2026-03-20 4:04 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-20 4:04 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, Tian, Kevin, Liu, Yi L, Hao, Xudong
Hi Clement,
>-----Original Message-----
>From: Duan, Zhenzhong
>Subject: RE: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in
>vtd_flush_host_piotlb_all_locked()
>
>Hi Clement,
>
>>-----Original Message-----
>>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@bull.com>
>>Subject: Re: [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in
>>vtd_flush_host_piotlb_all_locked()
>>
>>vtd_piotlb_page_invalidate can be called from
>>vtd_pri_perform_implicit_invalidation without any lock. Thus we can call
>>vtd_flush_host_piotlb_all_accel unlocked. Is there a risk in case of concurrent hot
>>plugging of another device?
>
>Good catch, hotplug is protected by BQL, but if an iothread could send PRI request,
>we suffer from the race.
>
>I think we can bypass vtd_pri_perform_implicit_invalidation() for passthrough
>device
>which doesn't cache iotlb entry in QEMU but in host, like:
>
>@@ -5401,7 +5401,8 @@ static int vtd_pri_request_page(PCIBus *bus, void
>*opaque, int devfn,
> return -ENOSPC;
> }
>
>- if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
>+ if (!vtd_find_hiod_iommufd(vtd_as) &&
>+ vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
> return -EINVAL;
> }
After further thinking, I found above change doesn’t resolve the race you mentioned.
But I think the iothread should take BQL before calling ::pri_request_page(),
because it is a function changing vtd device context, e.g., PQT and PRS register,
page invalidation queue and iotlb entries. We should always take BQL before changing
context of any device. Let me know if you have different opinions.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (10 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 11/13] intel_iommu_accel: drop _lock suffix in vtd_flush_host_piotlb_all_locked() Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
2026-03-06 3:44 ` [PATCH v1 13/13] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED when configured Zhenzhong Duan
12 siblings, 1 reply; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
If pasid bits size is bigger than host side, host could fail to emulate
all bindings in guest. Add a check to fail device plug early.
Pasid bits size should also be no more than 20 bits according to PCI spec.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 1 +
hw/i386/intel_iommu.c | 5 +++++
hw/i386/intel_iommu_accel.c | 8 ++++++++
3 files changed, 14 insertions(+)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index ede4db6d2d..d6674861fd 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -196,6 +196,7 @@
#define VTD_ECAP_SRS (1ULL << 31)
#define VTD_ECAP_NWFS (1ULL << 33)
#define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
+#define VTD_ECAP_PSS(ecap) extract64(ecap, 35, 5)
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_PDS (1ULL << 42)
#define VTD_ECAP_SMTS (1ULL << 43)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3ea5b92b34..e99a9cf9c6 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -5559,6 +5559,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
error_setg(errp, "Need to set scalable mode for PASID");
return false;
}
+ if (s->pasid > PCI_EXT_CAP_PASID_MAX_WIDTH) {
+ error_setg(errp, "PASID width %d, exceed Max PASID Width %d allowed "
+ "in PCI spec", s->pasid, PCI_EXT_CAP_PASID_MAX_WIDTH);
+ return false;
+ }
if (s->svm) {
if (!x86_iommu->dt_supported) {
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index acb1b1e238..15412123d5 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -44,6 +44,7 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
HostIOMMUDevice *hiod = vtd_hiod->hiod;
struct HostIOMMUDeviceCaps *caps = &hiod->caps;
struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
+ uint8_t hpasid = VTD_ECAP_PSS(vtd->ecap_reg) + 1;
PCIBus *bus = vtd_hiod->bus;
PCIDevice *pdev = bus->devices[vtd_hiod->devfn];
@@ -64,6 +65,13 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
return false;
}
+ /* Only do the check when host device support PASIDs */
+ if (caps->max_pasid_log2 && s->pasid > hpasid) {
+ error_setg(errp, "PASID bits size %d > host IOMMU PASID bits size %d",
+ s->pasid, hpasid);
+ return false;
+ }
+
if (pci_device_get_iommu_bus_devfn(pdev, &bus, NULL, NULL)) {
error_setg(errp, "Host device downstream to a PCI bridge is "
"unsupported when x-flts=on");
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread* Re: [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check
2026-03-06 3:44 ` [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
@ 2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
2026-03-09 2:16 ` Duan, Zhenzhong
0 siblings, 1 reply; 45+ messages in thread
From: CLEMENT MATHIEU--DRIF @ 2026-03-06 7:27 UTC (permalink / raw)
To: Zhenzhong Duan, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, kevin.tian@intel.com,
yi.l.liu@intel.com, xudong.hao@intel.com
Hi Zhenzhong,
On Thu, 2026-03-05 at 22:44 -0500, Zhenzhong Duan wrote:
> If pasid bits size is bigger than host side, host could fail to emulate
> all bindings in guest. Add a check to fail device plug early.
>
> Pasid bits size should also be no more than 20 bits according to PCI spec.
>
> Signed-off-by: Zhenzhong Duan <[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>
> ---
> hw/i386/intel_iommu_internal.h | 1 +
> hw/i386/intel_iommu.c | 5 +++++
> hw/i386/intel_iommu_accel.c | 8 ++++++++
> 3 files changed, 14 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index ede4db6d2d..d6674861fd 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -196,6 +196,7 @@
> #define VTD_ECAP_SRS (1ULL << 31)
> #define VTD_ECAP_NWFS (1ULL << 33)
> #define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
> +#define VTD_ECAP_PSS(ecap) extract64(ecap, 35, 5)
> #define VTD_ECAP_PASID (1ULL << 40)
> #define VTD_ECAP_PDS (1ULL << 42)
> #define VTD_ECAP_SMTS (1ULL << 43)
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 3ea5b92b34..e99a9cf9c6 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -5559,6 +5559,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> error_setg(errp, "Need to set scalable mode for PASID");
> return false;
> }
> + if (s->pasid > PCI_EXT_CAP_PASID_MAX_WIDTH) {
> + error_setg(errp, "PASID width %d, exceed Max PASID Width %d allowed "
> + "in PCI spec", s->pasid, PCI_EXT_CAP_PASID_MAX_WIDTH);
> + return false;
> + }
I think this one should go in patch 5/13 as it is not related to accel
>
> if (s->svm) {
> if (!x86_iommu->dt_supported) {
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index acb1b1e238..15412123d5 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -44,6 +44,7 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> HostIOMMUDevice *hiod = vtd_hiod->hiod;
> struct HostIOMMUDeviceCaps *caps = &hiod->caps;
> struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
> + uint8_t hpasid = VTD_ECAP_PSS(vtd->ecap_reg) + 1;
> PCIBus *bus = vtd_hiod->bus;
> PCIDevice *pdev = bus->devices[vtd_hiod->devfn];
>
> @@ -64,6 +65,13 @@ bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
> return false;
> }
>
> + /* Only do the check when host device support PASIDs */
> + if (caps->max_pasid_log2 && s->pasid > hpasid) {
> + error_setg(errp, "PASID bits size %d > host IOMMU PASID bits size %d",
> + s->pasid, hpasid);
> + return false;
> + }
> +
> if (pci_device_get_iommu_bus_devfn(pdev, &bus, NULL, NULL)) {
> error_setg(errp, "Host device downstream to a PCI bridge is "
> "unsupported when x-flts=on");
^ permalink raw reply [flat|nested] 45+ messages in thread* RE: [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check
2026-03-06 7:27 ` CLEMENT MATHIEU--DRIF
@ 2026-03-09 2:16 ` Duan, Zhenzhong
0 siblings, 0 replies; 45+ messages in thread
From: Duan, Zhenzhong @ 2026-03-09 2:16 UTC (permalink / raw)
To: CLEMENT MATHIEU--DRIF, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, eric.auger@redhat.com,
mst@redhat.com, jasowang@redhat.com, jgg@nvidia.com,
nicolinc@nvidia.com, skolothumtho@nvidia.com,
joao.m.martins@oracle.com, Tian, Kevin, Liu, Yi L, Hao, Xudong
Hi Clement,
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@bull.com>
>Subject: Re: [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check
>
>Hi Zhenzhong,
>
>On Thu, 2026-03-05 at 22:44 -0500, Zhenzhong Duan wrote:
>> If pasid bits size is bigger than host side, host could fail to emulate
>> all bindings in guest. Add a check to fail device plug early.
>>
>> Pasid bits size should also be no more than 20 bits according to PCI spec.
>>
>> Signed-off-by: Zhenzhong Duan
><[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>
>> ---
>> hw/i386/intel_iommu_internal.h | 1 +
>> hw/i386/intel_iommu.c | 5 +++++
>> hw/i386/intel_iommu_accel.c | 8 ++++++++
>> 3 files changed, 14 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index ede4db6d2d..d6674861fd 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -196,6 +196,7 @@
>> #define VTD_ECAP_SRS (1ULL << 31)
>> #define VTD_ECAP_NWFS (1ULL << 33)
>> #define VTD_ECAP_SET_PSS(x, v) ((x)->ecap = deposit64((x)->ecap, 35, 5, v))
>> +#define VTD_ECAP_PSS(ecap) extract64(ecap, 35, 5)
>> #define VTD_ECAP_PASID (1ULL << 40)
>> #define VTD_ECAP_PDS (1ULL << 42)
>> #define VTD_ECAP_SMTS (1ULL << 43)
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 3ea5b92b34..e99a9cf9c6 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -5559,6 +5559,11 @@ static bool vtd_decide_config(IntelIOMMUState *s,
>Error **errp)
>> error_setg(errp, "Need to set scalable mode for PASID");
>> return false;
>> }
>> + if (s->pasid > PCI_EXT_CAP_PASID_MAX_WIDTH) {
>> + error_setg(errp, "PASID width %d, exceed Max PASID Width %d allowed "
>> + "in PCI spec", s->pasid, PCI_EXT_CAP_PASID_MAX_WIDTH);
>> + return false;
>> + }
>
>I think this one should go in patch 5/13 as it is not related to accel
Yes, make sense, will do.
Thanks
Zhenzhong
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH v1 13/13] intel_iommu: Expose flag VIOMMU_FLAG_PASID_SUPPORTED when configured
2026-03-06 3:43 [PATCH v1 00/13] intel_iommu: Enable PASID support for passthrough device Zhenzhong Duan
` (11 preceding siblings ...)
2026-03-06 3:44 ` [PATCH v1 12/13] intel_iommu_accel: Add pasid bits size check Zhenzhong Duan
@ 2026-03-06 3:44 ` Zhenzhong Duan
12 siblings, 0 replies; 45+ messages in thread
From: Zhenzhong Duan @ 2026-03-06 3:44 UTC (permalink / raw)
To: qemu-devel
Cc: alex, clg, eric.auger, mst, jasowang, jgg, nicolinc, skolothumtho,
joao.m.martins, clement.mathieu--drif, kevin.tian, yi.l.liu,
xudong.hao, Zhenzhong Duan
VFIO device will check flag VIOMMU_FLAG_PASID_SUPPORTED and expose PASID
capability, or else guest could not enable PASID of this device even if
vIOMMU's pasid is configured.
This is the final knob to enable PASID.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e99a9cf9c6..af4904e341 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4796,6 +4796,7 @@ static uint64_t vtd_get_viommu_flags(void *opaque)
uint64_t flags;
flags = s->fsts ? VIOMMU_FLAG_WANT_NESTING_PARENT : 0;
+ flags |= s->pasid ? VIOMMU_FLAG_PASID_SUPPORTED : 0;
return flags;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 45+ messages in thread