* [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq
2025-12-04 9:22 [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Shameer Kolothum
@ 2025-12-04 9:22 ` Shameer Kolothum
2025-12-09 10:31 ` Eric Auger
2025-12-04 9:22 ` [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices Shameer Kolothum
` (3 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-04 9:22 UTC (permalink / raw)
To: qemu-arm, qemu-devel
Cc: eric.auger, peter.maydell, nicolinc, nathanc, mochs, jgg,
jonathan.cameron, zhangfei.gao, zhenzhong.duan, kjaju
From: Nicolin Chen <nicolinc@nvidia.com>
Add a new helper for IOMMU_VEVENTQ_ALLOC ioctl to allocate a virtual event
queue (vEVENTQ) for a vIOMMU object.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
backends/iommufd.c | 31 +++++++++++++++++++++++++++++++
backends/trace-events | 1 +
include/system/iommufd.h | 12 ++++++++++++
3 files changed, 44 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 718d63f5cf..633aecd525 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -503,6 +503,37 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
return true;
}
+bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t viommu_id,
+ uint32_t type, uint32_t depth,
+ uint32_t *out_veventq_id,
+ uint32_t *out_veventq_fd, Error **errp)
+{
+ int ret;
+ struct iommu_veventq_alloc alloc_veventq = {
+ .size = sizeof(alloc_veventq),
+ .flags = 0,
+ .type = type,
+ .veventq_depth = depth,
+ .viommu_id = viommu_id,
+ };
+
+ ret = ioctl(be->fd, IOMMU_VEVENTQ_ALLOC, &alloc_veventq);
+
+ trace_iommufd_viommu_alloc_eventq(be->fd, viommu_id, type,
+ alloc_veventq.out_veventq_id,
+ alloc_veventq.out_veventq_fd, ret);
+ if (ret) {
+ error_setg_errno(errp, errno, "IOMMU_VEVENTQ_ALLOC failed");
+ return false;
+ }
+
+ g_assert(out_veventq_id);
+ g_assert(out_veventq_fd);
+ *out_veventq_id = alloc_veventq.out_veventq_id;
+ *out_veventq_fd = alloc_veventq.out_veventq_fd;
+ return true;
+}
+
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
uint32_t hwpt_id, Error **errp)
{
diff --git a/backends/trace-events b/backends/trace-events
index 8408dc8701..5afa7a40be 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -23,3 +23,4 @@ iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, u
iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
iommufd_backend_alloc_viommu(int iommufd, uint32_t dev_id, uint32_t type, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)"
+iommufd_viommu_alloc_eventq(int iommufd, uint32_t viommu_id, uint32_t type, uint32_t veventq_id, uint32_t veventq_fd, int ret) " iommufd=%d viommu_id=%u type=%u veventq_id=%u veventq_fd=%u (%d)"
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index aa78bf1e1d..9770ff1484 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -56,6 +56,13 @@ typedef struct IOMMUFDVdev {
uint32_t virt_id; /* virtual device ID */
} IOMMUFDVdev;
+/* Virtual event queue interface for a vIOMMU */
+typedef struct IOMMUFDVeventq {
+ IOMMUFDViommu *viommu;
+ uint32_t veventq_id;
+ uint32_t veventq_fd;
+} IOMMUFDVeventq;
+
bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
void iommufd_backend_disconnect(IOMMUFDBackend *be);
@@ -86,6 +93,11 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
uint32_t viommu_id, uint64_t virt_id,
uint32_t *out_vdev_id, Error **errp);
+bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t viommu_id,
+ uint32_t type, uint32_t depth,
+ uint32_t *out_veventq_id,
+ uint32_t *out_veventq_fd, Error **errp);
+
bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
bool start, Error **errp);
bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq
2025-12-04 9:22 ` [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq Shameer Kolothum
@ 2025-12-09 10:31 ` Eric Auger
2025-12-10 15:13 ` Shameer Kolothum
0 siblings, 1 reply; 14+ messages in thread
From: Eric Auger @ 2025-12-09 10:31 UTC (permalink / raw)
To: Shameer Kolothum, qemu-arm, qemu-devel
Cc: peter.maydell, nicolinc, nathanc, mochs, jgg, jonathan.cameron,
zhangfei.gao, zhenzhong.duan, kjaju
On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Add a new helper for IOMMU_VEVENTQ_ALLOC ioctl to allocate a virtual event
> queue (vEVENTQ) for a vIOMMU object.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> backends/iommufd.c | 31 +++++++++++++++++++++++++++++++
> backends/trace-events | 1 +
> include/system/iommufd.h | 12 ++++++++++++
> 3 files changed, 44 insertions(+)
>
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index 718d63f5cf..633aecd525 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -503,6 +503,37 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
> return true;
> }
>
> +bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t viommu_id,
> + uint32_t type, uint32_t depth,
> + uint32_t *out_veventq_id,
> + uint32_t *out_veventq_fd, Error **errp)
> +{
> + int ret;
> + struct iommu_veventq_alloc alloc_veventq = {
> + .size = sizeof(alloc_veventq),
> + .flags = 0,
> + .type = type,
> + .veventq_depth = depth,
> + .viommu_id = viommu_id,
> + };
> +
> + ret = ioctl(be->fd, IOMMU_VEVENTQ_ALLOC, &alloc_veventq);
> +
> + trace_iommufd_viommu_alloc_eventq(be->fd, viommu_id, type,
> + alloc_veventq.out_veventq_id,
> + alloc_veventq.out_veventq_fd, ret);
> + if (ret) {
> + error_setg_errno(errp, errno, "IOMMU_VEVENTQ_ALLOC failed");
> + return false;
> + }
> +
> + g_assert(out_veventq_id);
> + g_assert(out_veventq_fd);
can it happen if the ioctl succeeds?
> + *out_veventq_id = alloc_veventq.out_veventq_id;
> + *out_veventq_fd = alloc_veventq.out_veventq_fd;
> + return true;
> +}
> +
> bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> uint32_t hwpt_id, Error **errp)
> {
> diff --git a/backends/trace-events b/backends/trace-events
> index 8408dc8701..5afa7a40be 100644
> --- a/backends/trace-events
> +++ b/backends/trace-events
> @@ -23,3 +23,4 @@ iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, u
> iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
> iommufd_backend_alloc_viommu(int iommufd, uint32_t dev_id, uint32_t type, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
> iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)"
> +iommufd_viommu_alloc_eventq(int iommufd, uint32_t viommu_id, uint32_t type, uint32_t veventq_id, uint32_t veventq_fd, int ret) " iommufd=%d viommu_id=%u type=%u veventq_id=%u veventq_fd=%u (%d)"
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index aa78bf1e1d..9770ff1484 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -56,6 +56,13 @@ typedef struct IOMMUFDVdev {
> uint32_t virt_id; /* virtual device ID */
> } IOMMUFDVdev;
>
> +/* Virtual event queue interface for a vIOMMU */
> +typedef struct IOMMUFDVeventq {
> + IOMMUFDViommu *viommu;
> + uint32_t veventq_id;
> + uint32_t veventq_fd;
> +} IOMMUFDVeventq;
> +
> bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
> void iommufd_backend_disconnect(IOMMUFDBackend *be);
>
> @@ -86,6 +93,11 @@ bool iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
> uint32_t viommu_id, uint64_t virt_id,
> uint32_t *out_vdev_id, Error **errp);
>
> +bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t viommu_id,
> + uint32_t type, uint32_t depth,
> + uint32_t *out_veventq_id,
> + uint32_t *out_veventq_fd, Error **errp);
> +
> bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
> bool start, Error **errp);
> bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
Eric
^ permalink raw reply [flat|nested] 14+ messages in thread* RE: [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq
2025-12-09 10:31 ` Eric Auger
@ 2025-12-10 15:13 ` Shameer Kolothum
0 siblings, 0 replies; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-10 15:13 UTC (permalink / raw)
To: eric.auger@redhat.com, qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, Nicolin Chen, Nathan Chen, Matt Ochs,
Jason Gunthorpe, jonathan.cameron@huawei.com,
zhangfei.gao@linaro.org, zhenzhong.duan@intel.com,
Krishnakant Jaju
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 09 December 2025 10:32
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
> <kjaju@nvidia.com>
> Subject: Re: [PATCH v2 1/4] backends/iommufd: Introduce
> iommufd_backend_alloc_veventq
>
> External email: Use caution opening links or attachments
>
>
> On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > Add a new helper for IOMMU_VEVENTQ_ALLOC ioctl to allocate a virtual
> > event queue (vEVENTQ) for a vIOMMU object.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > backends/iommufd.c | 31 +++++++++++++++++++++++++++++++
> > backends/trace-events | 1 +
> > include/system/iommufd.h | 12 ++++++++++++
> > 3 files changed, 44 insertions(+)
> >
> > diff --git a/backends/iommufd.c b/backends/iommufd.c index
> > 718d63f5cf..633aecd525 100644
> > --- a/backends/iommufd.c
> > +++ b/backends/iommufd.c
> > @@ -503,6 +503,37 @@ bool
> iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
> > return true;
> > }
> >
> > +bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t
> viommu_id,
> > + uint32_t type, uint32_t depth,
> > + uint32_t *out_veventq_id,
> > + uint32_t *out_veventq_fd, Error
> > +**errp) {
> > + int ret;
> > + struct iommu_veventq_alloc alloc_veventq = {
> > + .size = sizeof(alloc_veventq),
> > + .flags = 0,
> > + .type = type,
> > + .veventq_depth = depth,
> > + .viommu_id = viommu_id,
> > + };
> > +
> > + ret = ioctl(be->fd, IOMMU_VEVENTQ_ALLOC, &alloc_veventq);
> > +
> > + trace_iommufd_viommu_alloc_eventq(be->fd, viommu_id, type,
> > + alloc_veventq.out_veventq_id,
> > + alloc_veventq.out_veventq_fd, ret);
> > + if (ret) {
> > + error_setg_errno(errp, errno, "IOMMU_VEVENTQ_ALLOC failed");
> > + return false;
> > + }
> > +
> > + g_assert(out_veventq_id);
> > + g_assert(out_veventq_fd);
> can it happen if the ioctl succeeds?
It can right? If the caller has not provided a valid pointer to return the
values obtained from IOCTL.
Thanks,
Shameer
> > + *out_veventq_id = alloc_veventq.out_veventq_id;
> > + *out_veventq_fd = alloc_veventq.out_veventq_fd;
> > + return true;
> > +}
> > +
> > bool
> host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD
> *idev,
> > uint32_t hwpt_id, Error
> > **errp) { diff --git a/backends/trace-events b/backends/trace-events
> > index 8408dc8701..5afa7a40be 100644
> > --- a/backends/trace-events
> > +++ b/backends/trace-events
> > @@ -23,3 +23,4 @@ iommufd_backend_get_dirty_bitmap(int iommufd,
> > uint32_t hwpt_id, uint64_t iova, u iommufd_backend_invalidate_cache(int
> iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t
> entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d
> id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u
> data_ptr=0x%"PRIx64" (%d)"
> > iommufd_backend_alloc_viommu(int iommufd, uint32_t dev_id, uint32_t
> type, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u
> dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
> > iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t
> viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d
> dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)"
> > +iommufd_viommu_alloc_eventq(int iommufd, uint32_t viommu_id,
> uint32_t type, uint32_t veventq_id, uint32_t veventq_fd, int ret) "
> iommufd=%d viommu_id=%u type=%u veventq_id=%u veventq_fd=%u (%d)"
> > diff --git a/include/system/iommufd.h b/include/system/iommufd.h index
> > aa78bf1e1d..9770ff1484 100644
> > --- a/include/system/iommufd.h
> > +++ b/include/system/iommufd.h
> > @@ -56,6 +56,13 @@ typedef struct IOMMUFDVdev {
> > uint32_t virt_id; /* virtual device ID */ } IOMMUFDVdev;
> >
> > +/* Virtual event queue interface for a vIOMMU */ typedef struct
> > +IOMMUFDVeventq {
> > + IOMMUFDViommu *viommu;
> > + uint32_t veventq_id;
> > + uint32_t veventq_fd;
> > +} IOMMUFDVeventq;
> > +
> > bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
> void
> > iommufd_backend_disconnect(IOMMUFDBackend *be);
> >
> > @@ -86,6 +93,11 @@ bool
> iommufd_backend_alloc_vdev(IOMMUFDBackend *be, uint32_t dev_id,
> > uint32_t viommu_id, uint64_t virt_id,
> > uint32_t *out_vdev_id, Error **errp);
> >
> > +bool iommufd_backend_alloc_veventq(IOMMUFDBackend *be, uint32_t
> viommu_id,
> > + uint32_t type, uint32_t depth,
> > + uint32_t *out_veventq_id,
> > + uint32_t *out_veventq_fd, Error
> > +**errp);
> > +
> > bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
> uint32_t hwpt_id,
> > bool start, Error **errp);
> > bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t
> > hwpt_id,
> Eric
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
2025-12-04 9:22 [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Shameer Kolothum
2025-12-04 9:22 ` [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq Shameer Kolothum
@ 2025-12-04 9:22 ` Shameer Kolothum
2025-12-09 16:08 ` Eric Auger
2025-12-04 9:22 ` [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation Shameer Kolothum
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-04 9:22 UTC (permalink / raw)
To: qemu-arm, qemu-devel
Cc: eric.auger, peter.maydell, nicolinc, nathanc, mochs, jgg,
jonathan.cameron, zhangfei.gao, zhenzhong.duan, kjaju
From: Nicolin Chen <nicolinc@nvidia.com>
When the guest enables the Event Queue and a vIOMMU is present, allocate a
vEVENTQ object so that host-side events related to the vIOMMU can be
received and propagated back to the guest.
For cold-plugged devices using SMMUv3 acceleration, the vIOMMU is created
before the guest boots. In this case, the vEVENTQ is allocated when the
guest writes to SMMU_CR0 and sets EVENTQEN = 1.
If no cold-plugged device exists at boot (i.e. no vIOMMU initially), the
vEVENTQ is allocated when a vIOMMU is created, i.e. during the first
device hot-plug.
Event read and propagation will be added in a later patch.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-accel.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
hw/arm/smmuv3-accel.h | 6 +++++
hw/arm/smmuv3.c | 4 +++
3 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index dc0f61e841..74f0be3731 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
sizeof(Cmd), &entry_num, cmd, errp);
}
+static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
+{
+ IOMMUFDVeventq *veventq = accel->veventq;
+
+ if (!veventq) {
+ return;
+ }
+ iommufd_backend_free_id(accel->viommu.iommufd, veventq->veventq_id);
+ g_free(veventq);
+ accel->veventq = NULL;
+}
+
+bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
+{
+ SMMUv3AccelState *accel = s->s_accel;
+ IOMMUFDVeventq *veventq;
+ uint32_t veventq_id;
+ uint32_t veventq_fd;
+
+ if (!accel) {
+ return true;
+ }
+
+ if (accel->veventq) {
+ return true;
+ }
+
+ /*
+ * Check whether the Guest has enabled the Event Queue. The queue enabled
+ * means EVENTQ_BASE has been programmed with a valid base address and size.
+ * If it’s not yet configured, return and retry later.
+ */
+ if (!smmuv3_eventq_enabled(s)) {
+ return true;
+ }
+
+ if (!iommufd_backend_alloc_veventq(accel->viommu.iommufd,
+ accel->viommu.viommu_id,
+ IOMMU_VEVENTQ_TYPE_ARM_SMMUV3,
+ 1 << s->eventq.log2size, &veventq_id,
+ &veventq_fd, errp)) {
+ return false;
+ }
+
+ veventq = g_new(IOMMUFDVeventq, 1);
+ veventq->veventq_id = veventq_id;
+ veventq->veventq_fd = veventq_fd;
+ veventq->viommu = &accel->viommu;
+ accel->veventq = veventq;
+ return true;
+}
+
static bool
smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
Error **errp)
@@ -421,14 +473,21 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
goto free_abort_hwpt;
}
+ /* Allocate a vEVENTQ if guest has enabled event queue */
+ if (!smmuv3_accel_alloc_veventq(s, errp)) {
+ goto free_bypass_hwpt;
+ }
+
/* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
- goto free_bypass_hwpt;
+ goto free_veventq;
}
s->s_accel = accel;
return true;
+free_veventq:
+ smmuv3_accel_free_veventq(accel);
free_bypass_hwpt:
iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
free_abort_hwpt:
@@ -537,6 +596,7 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid);
if (QLIST_EMPTY(&accel->device_list)) {
+ smmuv3_accel_free_veventq(accel);
iommufd_backend_free_id(accel->viommu.iommufd, accel->bypass_hwpt_id);
iommufd_backend_free_id(accel->viommu.iommufd, accel->abort_hwpt_id);
iommufd_backend_free_id(accel->viommu.iommufd, accel->viommu.viommu_id);
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 2f2904d86b..7b0f585769 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -20,6 +20,7 @@
*/
typedef struct SMMUv3AccelState {
IOMMUFDViommu viommu;
+ IOMMUFDVeventq *veventq;
uint32_t bypass_hwpt_id;
uint32_t abort_hwpt_id;
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
@@ -48,6 +49,7 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp);
bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
Error **errp);
void smmuv3_accel_idr_override(SMMUv3State *s);
+bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
void smmuv3_accel_reset(SMMUv3State *s);
#else
static inline void smmuv3_accel_init(SMMUv3State *s)
@@ -78,6 +80,10 @@ smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
static inline void smmuv3_accel_idr_override(SMMUv3State *s)
{
}
+bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
+{
+ return true;
+}
static inline void smmuv3_accel_reset(SMMUv3State *s)
{
}
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 763f069a35..ac60ca0ce7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1602,6 +1602,10 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
s->cr0ack = data & ~SMMU_CR0_RESERVED;
/* in case the command queue has been enabled */
smmuv3_cmdq_consume(s, &local_err);
+ /* Allocate vEVENTQ if guest enables EventQ and vIOMMU is ready */
+ if (local_err == NULL) {
+ smmuv3_accel_alloc_veventq(s, &local_err);
+ }
break;
case A_CR1:
s->cr[1] = data;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
2025-12-04 9:22 ` [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices Shameer Kolothum
@ 2025-12-09 16:08 ` Eric Auger
2025-12-10 15:46 ` Shameer Kolothum
0 siblings, 1 reply; 14+ messages in thread
From: Eric Auger @ 2025-12-09 16:08 UTC (permalink / raw)
To: Shameer Kolothum, qemu-arm, qemu-devel
Cc: peter.maydell, nicolinc, nathanc, mochs, jgg, jonathan.cameron,
zhangfei.gao, zhenzhong.duan, kjaju
On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> When the guest enables the Event Queue and a vIOMMU is present, allocate a
> vEVENTQ object so that host-side events related to the vIOMMU can be
> received and propagated back to the guest.
>
> For cold-plugged devices using SMMUv3 acceleration, the vIOMMU is created
> before the guest boots. In this case, the vEVENTQ is allocated when the
> guest writes to SMMU_CR0 and sets EVENTQEN = 1.
>
> If no cold-plugged device exists at boot (i.e. no vIOMMU initially), the
> vEVENTQ is allocated when a vIOMMU is created, i.e. during the first
> device hot-plug.
>
> Event read and propagation will be added in a later patch.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-accel.c | 62 ++++++++++++++++++++++++++++++++++++++++++-
> hw/arm/smmuv3-accel.h | 6 +++++
> hw/arm/smmuv3.c | 4 +++
> 3 files changed, 71 insertions(+), 1 deletion(-)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index dc0f61e841..74f0be3731 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
> sizeof(Cmd), &entry_num, cmd, errp);
> }
>
> +static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> +{
> + IOMMUFDVeventq *veventq = accel->veventq;
> +
> + if (!veventq) {
> + return;
> + }
> + iommufd_backend_free_id(accel->viommu.iommufd, veventq->veventq_id);
> + g_free(veventq);
> + accel->veventq = NULL;
> +}
> +
> +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> +{
> + SMMUv3AccelState *accel = s->s_accel;
> + IOMMUFDVeventq *veventq;
> + uint32_t veventq_id;
> + uint32_t veventq_fd;
> +
> + if (!accel) {
> + return true;
> + }
> +
> + if (accel->veventq) {
> + return true;
> + }
> +
> + /*
> + * Check whether the Guest has enabled the Event Queue. The queue enabled
> + * means EVENTQ_BASE has been programmed with a valid base address and size.
> + * If it’s not yet configured, return and retry later.
The comment does not match the code nor the spec:
static inline bool smmuv3_eventq_enabled(SMMUv3State *s)
{
return FIELD_EX32(s->cr[0], CR0, EVENTQEN);
}
and in 7.2.1
Events are delivered into an Event queue if the queue is “writable”. The
Event queue is writable when all of the following are true: • The queue
is enabled, through SMMU_(*_)CR0.EVENTQEN for the Security state of the
queue. • The queue is not full (see section 7.4 Event queue overflow
regarding overflow). • No unacknowledged GERROR.EVENTQ_ABT_ERR condition
exists for the queue
> + */
> + if (!smmuv3_eventq_enabled(s)) {
> + return true;
> + }
> +
> + if (!iommufd_backend_alloc_veventq(accel->viommu.iommufd,
> + accel->viommu.viommu_id,
> + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3,
> + 1 << s->eventq.log2size, &veventq_id,
> + &veventq_fd, errp)) {
> + return false;
> + }
> +
> + veventq = g_new(IOMMUFDVeventq, 1);
> + veventq->veventq_id = veventq_id;
> + veventq->veventq_fd = veventq_fd;
> + veventq->viommu = &accel->viommu;
> + accel->veventq = veventq;
> + return true;
> +}
> +
> static bool
> smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
> Error **errp)
> @@ -421,14 +473,21 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
> goto free_abort_hwpt;
> }
>
> + /* Allocate a vEVENTQ if guest has enabled event queue */
> + if (!smmuv3_accel_alloc_veventq(s, errp)) {
> + goto free_bypass_hwpt;
> + }
Then why don't we do it always upon the SMMU_(*_)CR0.EVENTQEN write?
same question for the deallocation?
> +
> /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> - goto free_bypass_hwpt;
> + goto free_veventq;
> }
> s->s_accel = accel;
> return true;
>
> +free_veventq:
> + smmuv3_accel_free_veventq(accel);
> free_bypass_hwpt:
> iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
> free_abort_hwpt:
> @@ -537,6 +596,7 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid);
>
> if (QLIST_EMPTY(&accel->device_list)) {
> + smmuv3_accel_free_veventq(accel);
> iommufd_backend_free_id(accel->viommu.iommufd, accel->bypass_hwpt_id);
> iommufd_backend_free_id(accel->viommu.iommufd, accel->abort_hwpt_id);
> iommufd_backend_free_id(accel->viommu.iommufd, accel->viommu.viommu_id);
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 2f2904d86b..7b0f585769 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -20,6 +20,7 @@
> */
> typedef struct SMMUv3AccelState {
> IOMMUFDViommu viommu;
> + IOMMUFDVeventq *veventq;
> uint32_t bypass_hwpt_id;
> uint32_t abort_hwpt_id;
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> @@ -48,6 +49,7 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp);
> bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
> Error **errp);
> void smmuv3_accel_idr_override(SMMUv3State *s);
> +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
> void smmuv3_accel_reset(SMMUv3State *s);
> #else
> static inline void smmuv3_accel_init(SMMUv3State *s)
> @@ -78,6 +80,10 @@ smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
> static inline void smmuv3_accel_idr_override(SMMUv3State *s)
> {
> }
> +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> +{
> + return true;
> +}
> static inline void smmuv3_accel_reset(SMMUv3State *s)
> {
> }
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 763f069a35..ac60ca0ce7 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -1602,6 +1602,10 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
> s->cr0ack = data & ~SMMU_CR0_RESERVED;
> /* in case the command queue has been enabled */
> smmuv3_cmdq_consume(s, &local_err);
> + /* Allocate vEVENTQ if guest enables EventQ and vIOMMU is ready */
> + if (local_err == NULL) {
> + smmuv3_accel_alloc_veventq(s, &local_err);
> + }
> break;
> case A_CR1:
> s->cr[1] = data;
Thanks
Eric
^ permalink raw reply [flat|nested] 14+ messages in thread* RE: [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
2025-12-09 16:08 ` Eric Auger
@ 2025-12-10 15:46 ` Shameer Kolothum
0 siblings, 0 replies; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-10 15:46 UTC (permalink / raw)
To: eric.auger@redhat.com, qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, Nicolin Chen, Nathan Chen, Matt Ochs,
Jason Gunthorpe, jonathan.cameron@huawei.com,
zhangfei.gao@linaro.org, zhenzhong.duan@intel.com,
Krishnakant Jaju
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 09 December 2025 16:09
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
> <kjaju@nvidia.com>
> Subject: Re: [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for
> accelerated SMMUv3 devices
>
> External email: Use caution opening links or attachments
>
>
> On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > When the guest enables the Event Queue and a vIOMMU is present,
> allocate a
> > vEVENTQ object so that host-side events related to the vIOMMU can be
> > received and propagated back to the guest.
> >
> > For cold-plugged devices using SMMUv3 acceleration, the vIOMMU is
> created
> > before the guest boots. In this case, the vEVENTQ is allocated when the
> > guest writes to SMMU_CR0 and sets EVENTQEN = 1.
> >
> > If no cold-plugged device exists at boot (i.e. no vIOMMU initially), the
> > vEVENTQ is allocated when a vIOMMU is created, i.e. during the first
> > device hot-plug.
> >
> > Event read and propagation will be added in a later patch.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > hw/arm/smmuv3-accel.c | 62
> ++++++++++++++++++++++++++++++++++++++++++-
> > hw/arm/smmuv3-accel.h | 6 +++++
> > hw/arm/smmuv3.c | 4 +++
> > 3 files changed, 71 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index dc0f61e841..74f0be3731 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State
> *bs, void *cmd, SMMUDevice *sdev,
> > sizeof(Cmd), &entry_num, cmd, errp);
> > }
> >
> > +static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> > +{
> > + IOMMUFDVeventq *veventq = accel->veventq;
> > +
> > + if (!veventq) {
> > + return;
> > + }
> > + iommufd_backend_free_id(accel->viommu.iommufd, veventq-
> >veventq_id);
> > + g_free(veventq);
> > + accel->veventq = NULL;
> > +}
> > +
> > +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> > +{
> > + SMMUv3AccelState *accel = s->s_accel;
> > + IOMMUFDVeventq *veventq;
> > + uint32_t veventq_id;
> > + uint32_t veventq_fd;
> > +
> > + if (!accel) {
> > + return true;
> > + }
> > +
> > + if (accel->veventq) {
> > + return true;
> > + }
> > +
> > + /*
> > + * Check whether the Guest has enabled the Event Queue. The queue
> enabled
> > + * means EVENTQ_BASE has been programmed with a valid base
> address and size.
> > + * If it’s not yet configured, return and retry later.
> The comment does not match the code nor the spec:
That comment was based on this section from the spec:
"
6.3.26 SMMU_CMDQ_BASE
The registers must be initialized in this
order:
1. Write SMMU_CMDQ_BASE to set the queue base and size.
2. Write initial values to SMMU_CMDQ_CONS and SMMU_CMDQ_PROD.
3. Enable the queue with an Update of the respective SMMU_CR0.CMDQEN to 1.
This also applies to the initialization of Event queue and PRI queue registers.
"
So I interpreted it as when we have the event queue enabled, we can safely
assume that the base address and size are valid. We only require a valid size
to go ahead and allocate a vEVENTQ.
>
> static inline bool smmuv3_eventq_enabled(SMMUv3State *s)
> {
> return FIELD_EX32(s->cr[0], CR0, EVENTQEN);
> }
>
> and in 7.2.1
>
> Events are delivered into an Event queue if the queue is “writable”. The
> Event queue is writable when all of the following are true: • The queue
> is enabled, through SMMU_(*_)CR0.EVENTQEN for the Security state of the
> queue. • The queue is not full (see section 7.4 Event queue overflow
> regarding overflow). • No unacknowledged GERROR.EVENTQ_ABT_ERR
> condition
> exists for the queue
I think, this is about delivering events to the Event q. And smmuv3_write_eventq()
handles that when we try to propagate the event received from host.
Am I missing something here? Please let me know.
>
>
> > + */
> > + if (!smmuv3_eventq_enabled(s)) {
> > + return true;
> > + }
> > +
> > + if (!iommufd_backend_alloc_veventq(accel->viommu.iommufd,
> > + accel->viommu.viommu_id,
> > + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3,
> > + 1 << s->eventq.log2size, &veventq_id,
> > + &veventq_fd, errp)) {
> > + return false;
> > + }
> > +
> > + veventq = g_new(IOMMUFDVeventq, 1);
> > + veventq->veventq_id = veventq_id;
> > + veventq->veventq_fd = veventq_fd;
> > + veventq->viommu = &accel->viommu;
> > + accel->veventq = veventq;
> > + return true;
> > +}
> > +
> > static bool
> > smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> > Error **errp)
> > @@ -421,14 +473,21 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> > goto free_abort_hwpt;
> > }
> >
> > + /* Allocate a vEVENTQ if guest has enabled event queue */
> > + if (!smmuv3_accel_alloc_veventq(s, errp)) {
> > + goto free_bypass_hwpt;
> > + }
> Then why don't we do it always upon the SMMU_(*_)CR0.EVENTQEN write?
> same question for the deallocation?
As mentioned in the commit log, If no cold-plugged device exists at boot time, there
won't be any vIOMMU to allocate the vEVENTQ. Hence, we try at both vIOMMU alloc
time or at EVENTQEN write.
Thanks,
Shameer
> > +
> > /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> > hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> > if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> > - goto free_bypass_hwpt;
> > + goto free_veventq;
> > }
> > s->s_accel = accel;
> > return true;
> >
> > +free_veventq:
> > + smmuv3_accel_free_veventq(accel);
> > free_bypass_hwpt:
> > iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
> > free_abort_hwpt:
> > @@ -537,6 +596,7 @@ static void
> smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> > trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid);
> >
> > if (QLIST_EMPTY(&accel->device_list)) {
> > + smmuv3_accel_free_veventq(accel);
> > iommufd_backend_free_id(accel->viommu.iommufd, accel-
> >bypass_hwpt_id);
> > iommufd_backend_free_id(accel->viommu.iommufd, accel-
> >abort_hwpt_id);
> > iommufd_backend_free_id(accel->viommu.iommufd, accel-
> >viommu.viommu_id);
> > diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> > index 2f2904d86b..7b0f585769 100644
> > --- a/hw/arm/smmuv3-accel.h
> > +++ b/hw/arm/smmuv3-accel.h
> > @@ -20,6 +20,7 @@
> > */
> > typedef struct SMMUv3AccelState {
> > IOMMUFDViommu viommu;
> > + IOMMUFDVeventq *veventq;
> > uint32_t bypass_hwpt_id;
> > uint32_t abort_hwpt_id;
> > QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> > @@ -48,6 +49,7 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State
> *s, Error **errp);
> > bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd,
> SMMUDevice *sdev,
> > Error **errp);
> > void smmuv3_accel_idr_override(SMMUv3State *s);
> > +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
> > void smmuv3_accel_reset(SMMUv3State *s);
> > #else
> > static inline void smmuv3_accel_init(SMMUv3State *s)
> > @@ -78,6 +80,10 @@ smmuv3_accel_issue_inv_cmd(SMMUv3State *s,
> void *cmd, SMMUDevice *sdev,
> > static inline void smmuv3_accel_idr_override(SMMUv3State *s)
> > {
> > }
> > +bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> > +{
> > + return true;
> > +}
> > static inline void smmuv3_accel_reset(SMMUv3State *s)
> > {
> > }
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 763f069a35..ac60ca0ce7 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1602,6 +1602,10 @@ static MemTxResult smmu_writel(SMMUv3State
> *s, hwaddr offset,
> > s->cr0ack = data & ~SMMU_CR0_RESERVED;
> > /* in case the command queue has been enabled */
> > smmuv3_cmdq_consume(s, &local_err);
> > + /* Allocate vEVENTQ if guest enables EventQ and vIOMMU is ready */
> > + if (local_err == NULL) {
> > + smmuv3_accel_alloc_veventq(s, &local_err);
> > + }
> > break;
> > case A_CR1:
> > s->cr[1] = data;
> Thanks
>
> Eric
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation
2025-12-04 9:22 [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Shameer Kolothum
2025-12-04 9:22 ` [PATCH v2 1/4] backends/iommufd: Introduce iommufd_backend_alloc_veventq Shameer Kolothum
2025-12-04 9:22 ` [PATCH v2 2/4] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices Shameer Kolothum
@ 2025-12-04 9:22 ` Shameer Kolothum
2025-12-10 7:52 ` Eric Auger
2025-12-11 7:38 ` Nicolin Chen
2025-12-04 9:22 ` [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events Shameer Kolothum
2025-12-09 10:18 ` [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Eric Auger
4 siblings, 2 replies; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-04 9:22 UTC (permalink / raw)
To: qemu-arm, qemu-devel
Cc: eric.auger, peter.maydell, nicolinc, nathanc, mochs, jgg,
jonathan.cameron, zhangfei.gao, zhenzhong.duan, kjaju
Factor out the code that propagates event records to the guest into a
helper function. The accelerated SMMUv3 path can use this to propagate
host events in a subsequent patch.
Since this helper may be called from outside the SMMUv3 core, take the
mutex before accessing the Event Queue.
No functional change intended.
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-internal.h | 4 ++++
hw/arm/smmuv3.c | 21 +++++++++++++++------
hw/arm/trace-events | 2 +-
3 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index e45aad27f7..ad7f5e6640 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -525,7 +525,11 @@ typedef struct SMMUEventInfo {
(x)->word[6] = (uint32_t)(addr & 0xffffffff); \
} while (0)
+#define EVT_GET_TYPE(x) extract32((x)->word[0], 0, 8)
+#define EVT_GET_SID(x) ((x)->word[1])
+
void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *event);
+void smmuv3_propagate_event(SMMUv3State *s, Evt *evt);
/* Configuration Data */
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ac60ca0ce7..9b7b85fb49 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -168,10 +168,23 @@ static MemTxResult smmuv3_write_eventq(SMMUv3State *s, Evt *evt)
return MEMTX_OK;
}
+void smmuv3_propagate_event(SMMUv3State *s, Evt *evt)
+{
+ MemTxResult r;
+
+ trace_smmuv3_propagate_event(smmu_event_string(EVT_GET_TYPE(evt)),
+ EVT_GET_SID(evt));
+ qemu_mutex_lock(&s->mutex);
+ r = smmuv3_write_eventq(s, evt);
+ if (r != MEMTX_OK) {
+ smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_EVENTQ_ABT_ERR_MASK);
+ }
+ qemu_mutex_unlock(&s->mutex);
+}
+
void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
{
Evt evt = {};
- MemTxResult r;
if (!smmuv3_eventq_enabled(s)) {
return;
@@ -251,11 +264,7 @@ void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
g_assert_not_reached();
}
- trace_smmuv3_record_event(smmu_event_string(info->type), info->sid);
- r = smmuv3_write_eventq(s, &evt);
- if (r != MEMTX_OK) {
- smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_EVENTQ_ABT_ERR_MASK);
- }
+ smmuv3_propagate_event(s, &evt);
info->recorded = true;
}
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 8135c0c734..3457536fb0 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -40,7 +40,7 @@ smmuv3_cmdq_opcode(const char *opcode) "<--- %s"
smmuv3_cmdq_consume_out(uint32_t prod, uint32_t cons, uint8_t prod_wrap, uint8_t cons_wrap) "prod:%d, cons:%d, prod_wrap:%d, cons_wrap:%d "
smmuv3_cmdq_consume_error(const char *cmd_name, uint8_t cmd_error) "Error on %s command execution: %d"
smmuv3_write_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
-smmuv3_record_event(const char *type, uint32_t sid) "%s sid=0x%x"
+smmuv3_propagate_event(const char *type, uint32_t sid) "%s sid=0x%x"
smmuv3_find_ste(uint16_t sid, uint32_t features, uint16_t sid_split) "sid=0x%x features:0x%x, sid_split:0x%x"
smmuv3_find_ste_2lvl(uint64_t strtab_base, uint64_t l1ptr, int l1_ste_offset, uint64_t l2ptr, int l2_ste_offset, int max_l2_ste) "strtab_base:0x%"PRIx64" l1ptr:0x%"PRIx64" l1_off:0x%x, l2ptr:0x%"PRIx64" l2_off:0x%x max_l2_ste:%d"
smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation
2025-12-04 9:22 ` [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation Shameer Kolothum
@ 2025-12-10 7:52 ` Eric Auger
2025-12-11 7:38 ` Nicolin Chen
1 sibling, 0 replies; 14+ messages in thread
From: Eric Auger @ 2025-12-10 7:52 UTC (permalink / raw)
To: Shameer Kolothum, qemu-arm, qemu-devel
Cc: peter.maydell, nicolinc, nathanc, mochs, jgg, jonathan.cameron,
zhangfei.gao, zhenzhong.duan, kjaju
On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> Factor out the code that propagates event records to the guest into a
> helper function. The accelerated SMMUv3 path can use this to propagate
> host events in a subsequent patch.
>
> Since this helper may be called from outside the SMMUv3 core, take the
> mutex before accessing the Event Queue.
>
> No functional change intended.
>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-internal.h | 4 ++++
> hw/arm/smmuv3.c | 21 +++++++++++++++------
> hw/arm/trace-events | 2 +-
> 3 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index e45aad27f7..ad7f5e6640 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -525,7 +525,11 @@ typedef struct SMMUEventInfo {
> (x)->word[6] = (uint32_t)(addr & 0xffffffff); \
> } while (0)
>
> +#define EVT_GET_TYPE(x) extract32((x)->word[0], 0, 8)
> +#define EVT_GET_SID(x) ((x)->word[1])
> +
> void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *event);
> +void smmuv3_propagate_event(SMMUv3State *s, Evt *evt);
>
> /* Configuration Data */
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index ac60ca0ce7..9b7b85fb49 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -168,10 +168,23 @@ static MemTxResult smmuv3_write_eventq(SMMUv3State *s, Evt *evt)
> return MEMTX_OK;
> }
>
> +void smmuv3_propagate_event(SMMUv3State *s, Evt *evt)
> +{
> + MemTxResult r;
> +
> + trace_smmuv3_propagate_event(smmu_event_string(EVT_GET_TYPE(evt)),
> + EVT_GET_SID(evt));
> + qemu_mutex_lock(&s->mutex);
> + r = smmuv3_write_eventq(s, evt);
> + if (r != MEMTX_OK) {
> + smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_EVENTQ_ABT_ERR_MASK);
> + }
> + qemu_mutex_unlock(&s->mutex);
> +}
> +
> void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
> {
> Evt evt = {};
> - MemTxResult r;
>
> if (!smmuv3_eventq_enabled(s)) {
> return;
> @@ -251,11 +264,7 @@ void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
> g_assert_not_reached();
> }
>
> - trace_smmuv3_record_event(smmu_event_string(info->type), info->sid);
> - r = smmuv3_write_eventq(s, &evt);
> - if (r != MEMTX_OK) {
> - smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_EVENTQ_ABT_ERR_MASK);
> - }
> + smmuv3_propagate_event(s, &evt);
> info->recorded = true;
> }
>
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index 8135c0c734..3457536fb0 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -40,7 +40,7 @@ smmuv3_cmdq_opcode(const char *opcode) "<--- %s"
> smmuv3_cmdq_consume_out(uint32_t prod, uint32_t cons, uint8_t prod_wrap, uint8_t cons_wrap) "prod:%d, cons:%d, prod_wrap:%d, cons_wrap:%d "
> smmuv3_cmdq_consume_error(const char *cmd_name, uint8_t cmd_error) "Error on %s command execution: %d"
> smmuv3_write_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
> -smmuv3_record_event(const char *type, uint32_t sid) "%s sid=0x%x"
> +smmuv3_propagate_event(const char *type, uint32_t sid) "%s sid=0x%x"
> smmuv3_find_ste(uint16_t sid, uint32_t features, uint16_t sid_split) "sid=0x%x features:0x%x, sid_split:0x%x"
> smmuv3_find_ste_2lvl(uint64_t strtab_base, uint64_t l1ptr, int l1_ste_offset, uint64_t l2ptr, int l2_ste_offset, int max_l2_ste) "strtab_base:0x%"PRIx64" l1ptr:0x%"PRIx64" l1_off:0x%x, l2ptr:0x%"PRIx64" l2_off:0x%x max_l2_ste:%d"
> smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation
2025-12-04 9:22 ` [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation Shameer Kolothum
2025-12-10 7:52 ` Eric Auger
@ 2025-12-11 7:38 ` Nicolin Chen
1 sibling, 0 replies; 14+ messages in thread
From: Nicolin Chen @ 2025-12-11 7:38 UTC (permalink / raw)
To: Shameer Kolothum
Cc: qemu-arm, qemu-devel, eric.auger, peter.maydell, nathanc, mochs,
jgg, jonathan.cameron, zhangfei.gao, zhenzhong.duan, kjaju
On Thu, Dec 04, 2025 at 09:22:40AM +0000, Shameer Kolothum wrote:
> Factor out the code that propagates event records to the guest into a
> helper function. The accelerated SMMUv3 path can use this to propagate
> host events in a subsequent patch.
>
> Since this helper may be called from outside the SMMUv3 core, take the
> mutex before accessing the Event Queue.
I wonder if it'd be better to use a different mutex so eventq and
cmdq wouldn't be mutually exclusive?
Otherwise,
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events
2025-12-04 9:22 [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Shameer Kolothum
` (2 preceding siblings ...)
2025-12-04 9:22 ` [PATCH v2 3/4] hw/arm/smmuv3: Introduce a helper function for event propagation Shameer Kolothum
@ 2025-12-04 9:22 ` Shameer Kolothum
2025-12-10 8:19 ` Eric Auger
2025-12-09 10:18 ` [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Eric Auger
4 siblings, 1 reply; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-04 9:22 UTC (permalink / raw)
To: qemu-arm, qemu-devel
Cc: eric.auger, peter.maydell, nicolinc, nathanc, mochs, jgg,
jonathan.cameron, zhangfei.gao, zhenzhong.duan, kjaju
Install an event handler on the vEVENTQ fd to read and propagate host
generated vIOMMU events to the guest.
The handler runs in QEMU’s main loop, using a non-blocking fd registered
via qemu_set_fd_handler().
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-accel.c | 58 +++++++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 2 ++
2 files changed, 60 insertions(+)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 74f0be3731..d320c62b04 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
sizeof(Cmd), &entry_num, cmd, errp);
}
+static void smmuv3_accel_event_read(void *opaque)
+{
+ SMMUv3State *s = opaque;
+ SMMUv3AccelState *accel = s->s_accel;
+ struct {
+ struct iommufd_vevent_header hdr;
+ struct iommu_vevent_arm_smmuv3 vevent;
+ } buf;
+ ssize_t readsz = sizeof(buf);
+ uint32_t last_seq = accel->last_event_seq;
+ ssize_t bytes;
+
+ bytes = read(accel->veventq->veventq_fd, &buf, readsz);
+ if (bytes <= 0) {
+ if (errno == EAGAIN || errno == EINTR) {
+ return;
+ }
+ error_report("vEVENTQ: read failed (%s)", strerror(errno));
+ return;
+ }
+
+ if (bytes < readsz) {
+ error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes, readsz);
+ return;
+ }
+
+ if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
+ error_report("vEVENTQ has lost events");
+ return;
+ }
+
+ /* Check sequence in hdr for lost events if any */
+ if (accel->event_start) {
+ uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
+
+ if (buf.hdr.sequence != expected) {
+ uint32_t delta;
+
+ if (buf.hdr.sequence >= last_seq) {
+ delta = buf.hdr.sequence - last_seq;
+ } else {
+ /* Handle wraparound from INT_MAX */
+ delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
+ }
+ error_report("vEVENTQ: detected lost %u event(s)", delta - 1);
+ }
+ }
+ accel->last_event_seq = buf.hdr.sequence;
+ accel->event_start = true;
+ smmuv3_propagate_event(s, (Evt *)&buf.vevent);
+}
+
static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
{
IOMMUFDVeventq *veventq = accel->veventq;
@@ -385,6 +437,8 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
if (!veventq) {
return;
}
+ qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
+ close(veventq->veventq_fd);
iommufd_backend_free_id(accel->viommu.iommufd, veventq->veventq_id);
g_free(veventq);
accel->veventq = NULL;
@@ -427,6 +481,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
veventq->veventq_fd = veventq_fd;
veventq->viommu = &accel->viommu;
accel->veventq = veventq;
+
+ /* Set up event handler for veventq fd */
+ fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
+ qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
return true;
}
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 7b0f585769..2c7c30d6a0 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -21,6 +21,8 @@
typedef struct SMMUv3AccelState {
IOMMUFDViommu viommu;
IOMMUFDVeventq *veventq;
+ uint32_t last_event_seq;
+ bool event_start;
uint32_t bypass_hwpt_id;
uint32_t abort_hwpt_id;
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events
2025-12-04 9:22 ` [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events Shameer Kolothum
@ 2025-12-10 8:19 ` Eric Auger
2025-12-10 16:19 ` Shameer Kolothum
0 siblings, 1 reply; 14+ messages in thread
From: Eric Auger @ 2025-12-10 8:19 UTC (permalink / raw)
To: Shameer Kolothum, qemu-arm, qemu-devel
Cc: peter.maydell, nicolinc, nathanc, mochs, jgg, jonathan.cameron,
zhangfei.gao, zhenzhong.duan, kjaju
Hi Shameer,
On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> Install an event handler on the vEVENTQ fd to read and propagate host
> generated vIOMMU events to the guest.
>
> The handler runs in QEMU’s main loop, using a non-blocking fd registered
> via qemu_set_fd_handler().
is it future proof to do that in the main loop?
>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-accel.c | 58 +++++++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 2 ++
> 2 files changed, 60 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 74f0be3731..d320c62b04 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
> sizeof(Cmd), &entry_num, cmd, errp);
> }
>
> +static void smmuv3_accel_event_read(void *opaque)
So if I understand correctly this handler is called for every
header/data. There cannot be several of them to be consumed in the queue?
> +{
> + SMMUv3State *s = opaque;
> + SMMUv3AccelState *accel = s->s_accel;
> + struct {
> + struct iommufd_vevent_header hdr;
> + struct iommu_vevent_arm_smmuv3 vevent;
> + } buf;
> + ssize_t readsz = sizeof(buf);
> + uint32_t last_seq = accel->last_event_seq;
> + ssize_t bytes;
> +
> + bytes = read(accel->veventq->veventq_fd, &buf, readsz);
> + if (bytes <= 0) {
> + if (errno == EAGAIN || errno == EINTR) {
> + return;
> + }
> + error_report("vEVENTQ: read failed (%s)", strerror(errno));
nit you can use %m directly
> + return;
> + }
> +
> + if (bytes < readsz) {
> + error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes, readsz);
> + return;
> + }
> +
> + if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
> + error_report("vEVENTQ has lost events");
once you get a lost_event, don't you need to reset the last_event_seq,
event_start to be able to consume again?
> + return;
> + }
> +
> + /* Check sequence in hdr for lost events if any */
> + if (accel->event_start) {
> + uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
> +
> + if (buf.hdr.sequence != expected) {
> + uint32_t delta;
> +
> + if (buf.hdr.sequence >= last_seq) {
> + delta = buf.hdr.sequence - last_seq;
> + } else {
> + /* Handle wraparound from INT_MAX */
> + delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
> + }
> + error_report("vEVENTQ: detected lost %u event(s)", delta - 1);
do we want to report all losses or just warn once?
> + }
> + }
> + accel->last_event_seq = buf.hdr.sequence;
> + accel->event_start = true;
> + smmuv3_propagate_event(s, (Evt *)&buf.vevent);
> +}
> +
> static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> {
> IOMMUFDVeventq *veventq = accel->veventq;
> @@ -385,6 +437,8 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> if (!veventq) {
> return;
> }
> + qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
> + close(veventq->veventq_fd);
> iommufd_backend_free_id(accel->viommu.iommufd, veventq->veventq_id);
> g_free(veventq);
> accel->veventq = NULL;
> @@ -427,6 +481,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> veventq->veventq_fd = veventq_fd;
> veventq->viommu = &accel->viommu;
> accel->veventq = veventq;
> +
> + /* Set up event handler for veventq fd */
> + fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
> + qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
> return true;
> }
>
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 7b0f585769..2c7c30d6a0 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -21,6 +21,8 @@
> typedef struct SMMUv3AccelState {
> IOMMUFDViommu viommu;
> IOMMUFDVeventq *veventq;
> + uint32_t last_event_seq;
> + bool event_start;
> uint32_t bypass_hwpt_id;
> uint32_t abort_hwpt_id;
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
Thanks
Eric
^ permalink raw reply [flat|nested] 14+ messages in thread* RE: [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events
2025-12-10 8:19 ` Eric Auger
@ 2025-12-10 16:19 ` Shameer Kolothum
0 siblings, 0 replies; 14+ messages in thread
From: Shameer Kolothum @ 2025-12-10 16:19 UTC (permalink / raw)
To: eric.auger@redhat.com, qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, Nicolin Chen, Nathan Chen, Matt Ochs,
Jason Gunthorpe, jonathan.cameron@huawei.com,
zhangfei.gao@linaro.org, zhenzhong.duan@intel.com,
Krishnakant Jaju
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 10 December 2025 08:19
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
> <kjaju@nvidia.com>
> Subject: Re: [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host
> vIOMMU events
>
> External email: Use caution opening links or attachments
>
>
> Hi Shameer,
>
> On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> > Install an event handler on the vEVENTQ fd to read and propagate host
> > generated vIOMMU events to the guest.
> >
> > The handler runs in QEMU’s main loop, using a non-blocking fd registered
> > via qemu_set_fd_handler().
> is it future proof to do that in the main loop?
The reason I opt for the main loop is, vEVENTQ object is just for event records
other than page faults and we are not expecting a large number of them in a
normal Guest scenario. And if there are a lot of them, the there is something
obviously going wrong.
Page fault events are handles in a separate FAULT_QUEUE object. See the
discussion here,
https://lore.kernel.org/qemu-devel/CABQgh9HzB9yCD_rYjGFX5ZC7RX2e2iVu_FZPU2Vm-kuf3jfm+w@mail.gmail.com/
The page fault handling might require a dedicated thread to speed up
things, I guess.
> >
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > hw/arm/smmuv3-accel.c | 58
> +++++++++++++++++++++++++++++++++++++++++++
> > hw/arm/smmuv3-accel.h | 2 ++
> > 2 files changed, 60 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index 74f0be3731..d320c62b04 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -378,6 +378,58 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State
> *bs, void *cmd, SMMUDevice *sdev,
> > sizeof(Cmd), &entry_num, cmd, errp);
> > }
> >
> > +static void smmuv3_accel_event_read(void *opaque)
> So if I understand correctly this handler is called for every
> header/data. There cannot be several of them to be consumed in the queue?
That is my understanding how aio_dispatch_handler() works. I will double
check.
> > +{
> > + SMMUv3State *s = opaque;
> > + SMMUv3AccelState *accel = s->s_accel;
> > + struct {
> > + struct iommufd_vevent_header hdr;
> > + struct iommu_vevent_arm_smmuv3 vevent;
> > + } buf;
> > + ssize_t readsz = sizeof(buf);
> > + uint32_t last_seq = accel->last_event_seq;
> > + ssize_t bytes;
> > +
> > + bytes = read(accel->veventq->veventq_fd, &buf, readsz);
> > + if (bytes <= 0) {
> > + if (errno == EAGAIN || errno == EINTR) {
> > + return;
> > + }
> > + error_report("vEVENTQ: read failed (%s)", strerror(errno));
> nit you can use %m directly
Ok.
> > + return;
> > + }
> > +
> > + if (bytes < readsz) {
> > + error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes,
> readsz);
> > + return;
> > + }
> > +
> > + if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
> > + error_report("vEVENTQ has lost events");
> once you get a lost_event, don't you need to reset the last_event_seq,
> event_start to be able to consume again?
Yes. I think we need to do that.
> > + return;
> > + }
> > +
> > + /* Check sequence in hdr for lost events if any */
> > + if (accel->event_start) {
> > + uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
> > +
> > + if (buf.hdr.sequence != expected) {
> > + uint32_t delta;
> > +
> > + if (buf.hdr.sequence >= last_seq) {
> > + delta = buf.hdr.sequence - last_seq;
> > + } else {
> > + /* Handle wraparound from INT_MAX */
> > + delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
> > + }
> > + error_report("vEVENTQ: detected lost %u event(s)", delta - 1);
> do we want to report all losses or just warn once?
Will change to warn once.
Thanks,
Shameer
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices
2025-12-04 9:22 [PATCH v2 0/4] vEVENTQ support for accelerated SMMUv3 devices Shameer Kolothum
` (3 preceding siblings ...)
2025-12-04 9:22 ` [PATCH v2 4/4] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events Shameer Kolothum
@ 2025-12-09 10:18 ` Eric Auger
4 siblings, 0 replies; 14+ messages in thread
From: Eric Auger @ 2025-12-09 10:18 UTC (permalink / raw)
To: Shameer Kolothum, qemu-arm, qemu-devel
Cc: peter.maydell, nicolinc, nathanc, mochs, jgg, jonathan.cameron,
zhangfei.gao, zhenzhong.duan, kjaju
On 12/4/25 10:22 AM, Shameer Kolothum wrote:
> Hi,
>
> Changes from RFC v1:
> https://lore.kernel.org/qemu-devel/20251105154657.37386-1-skolothumtho@nvidia.com/
>
> -Rebased on v6[0] of the "accelerated SMMUv3" series
> -Addressed feedback on the RFC. Thanks!
> -Dropped the RFC tag as the accelerated series is now more mature, and once
> that lands, it makes sense to pick this up as well since it enables the
> delivery of SMMUv3 events to the guest.
>
> When accel=on is enabled for an SMMUv3 instance, the host hardware SMMUv3
> may generate Stage-1 (S1) fault or event notifications that are intended
> for the vIOMMU instance in userspace.
>
> This series adds QEMU support for receiving such host events through the
> vEVENTQ interface and propagating them to the guest. The implementation
> uses the vEVENTQ support provided by the IOMMUFD subsystem in the kernel.
>
> I have lightly this on a Grace platform using some hacks to generate fault
tested
Eric
> events. Further testing and feedback are welcome.
>
> Thanks,
> Shameer
>
> [0]https://lore.kernel.org/qemu-devel/20251120132213.56581-1-skolothumtho@nvidia.com/
>
> Nicolin Chen (2):
> backends/iommufd: Introduce iommufd_backend_alloc_veventq
> hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
>
> Shameer Kolothum (2):
> hw/arm/smmuv3: Introduce a helper function for event propagation
> hw/arm/smmuv3-accel: Read and propagate host vIOMMU events
>
> backends/iommufd.c | 31 ++++++++++
> backends/trace-events | 1 +
> hw/arm/smmuv3-accel.c | 120 ++++++++++++++++++++++++++++++++++++++-
> hw/arm/smmuv3-accel.h | 8 +++
> hw/arm/smmuv3-internal.h | 4 ++
> hw/arm/smmuv3.c | 25 ++++++--
> hw/arm/trace-events | 2 +-
> include/system/iommufd.h | 12 ++++
> 8 files changed, 195 insertions(+), 8 deletions(-)
>
^ permalink raw reply [flat|nested] 14+ messages in thread