[RFC PATCH] iommufd: Destroy vdevice on device unbind

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH] iommufd: Destroy vdevice on device unbind
@ 2025-06-10  6:51 Aneesh Kumar K.V (Arm)
  2025-06-12  8:05 ` Tian, Kevin
  0 siblings, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V (Arm) @ 2025-06-10  6:51 UTC (permalink / raw)
  To: iommu, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Jason Gunthorpe, Kevin Tian, Joerg Roedel,
	Will Deacon, Robin Murphy

The iommufd subsystem uses the VFIO character device descriptor to bind
a device file to an iommufd context via the VFIO_DEVICE_BIND_IOMMUFD
ioctl. This binding returns a bind_id, which is then used in subsequent
iommufd ioctls such as IOMMU_HWPT_ALLOC, IOMMU_VIOMMU_ALLOC, and
IOMMU_VDEVICE_ALLOC.

Among these, IOMMU_VDEVICE_ALLOC is special—the lifetime of a virtual
device (vdevice) should be tied to the bound state of its physical
device.

In the current kernel, there is no enforced dependency between
iommufd_device and iommufd_vdevice. This patch introduces such a
dependency: when the device is unbound, the associated vdevice is now
automatically destroyed.

Although there is already an implicit dependency—vdevices can only be
destroyed after the iommufd_device is unbound due to the VFIO cdev file
descriptor holding a reference to the iommu file descriptor—this patch
formalizes and extends that relationship. Now, the vdevice will be
explicitly destroyed when its corresponding device is unbound.

Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Will Deacon <will@kernel.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 drivers/iommu/iommufd/device.c          |  8 ++++++++
 drivers/iommu/iommufd/iommufd_private.h |  5 +++++
 drivers/iommu/iommufd/main.c            |  6 ++++++
 drivers/iommu/iommufd/viommu.c          | 26 +++++++++++++++++++++++--
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..a49b293bd516 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -221,6 +221,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
 	refcount_inc(&idev->obj.users);
 	/* igroup refcount moves into iommufd_device */
 	idev->igroup = igroup;
+	idev->vdev   = NULL;
+	mutex_init(&idev->lock);
 
 	/*
 	 * If the caller fails after this success it must call
@@ -282,6 +284,12 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
  */
 void iommufd_device_unbind(struct iommufd_device *idev)
 {
+	/* this will be unlocked while destroying the idev obj */
+	mutex_lock(&idev->lock);
+
+	if (idev->vdev)
+		/* extra refcount taken during vdevice alloc */
+		iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj);
 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 9ccc83341f32..d85bd8b38751 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -425,6 +425,10 @@ struct iommufd_device {
 	/* always the physical device */
 	struct device *dev;
 	bool enforce_cache_coherency;
+	/* to protect the following members*/
+	struct mutex lock;
+	/* if there is a vdevice mapping the idev */
+	struct iommufd_vdevice *vdev;
 };
 
 static inline struct iommufd_device *
@@ -606,6 +610,7 @@ struct iommufd_vdevice {
 	struct iommufd_ctx *ictx;
 	struct iommufd_viommu *viommu;
 	struct device *dev;
+	struct iommufd_device *idev;
 	u64 id; /* per-vIOMMU virtual ID */
 };
 
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3df468f64e7d..bf653d16138e 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -172,6 +172,12 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
 		ictx->vfio_ioas = NULL;
 	xa_unlock(&ictx->objects);
 
+	if (obj->type == IOMMUFD_OBJ_DEVICE) {
+		/* idevice should be freed with lock held */
+		struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj);
+
+		mutex_unlock(&idev->lock);
+	}
 	/*
 	 * Since users is zero any positive users_shortterm must be racing
 	 * iommufd_put_object(), or we have a bug.
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 01df2b985f02..17f189bc9e2c 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -84,15 +84,24 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	return rc;
 }
 
+/* This will be called from iommufd_device_unbind  */
 void iommufd_vdevice_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_vdevice *vdev =
 		container_of(obj, struct iommufd_vdevice, obj);
 	struct iommufd_viommu *viommu = vdev->viommu;
+	struct iommufd_device *idev = vdev->idev;
+
+	/*
+	 * since we have an refcount on idev, it can't be freed.
+	 */
+	lockdep_assert_held(&idev->lock);
 
 	/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
 	xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
 	refcount_dec(&viommu->obj.users);
+	idev->vdev = NULL;
+	refcount_dec(&idev->obj.users);
 	put_device(vdev->dev);
 }
 
@@ -124,10 +133,15 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
 		goto out_put_idev;
 	}
 
+	mutex_lock(&idev->lock);
+	if (idev->vdev) {
+		rc = -EINVAL;
+		goto out_put_idev_unlock;
+	}
 	vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
 	if (IS_ERR(vdev)) {
 		rc = PTR_ERR(vdev);
-		goto out_put_idev;
+		goto out_put_idev_unlock;
 	}
 
 	vdev->id = virt_id;
@@ -147,10 +161,18 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	if (rc)
 		goto out_abort;
 	iommufd_object_finalize(ucmd->ictx, &vdev->obj);
-	goto out_put_idev;
+	/* don't allow idev free without vdev free */
+	refcount_inc(&idev->obj.users);
+	vdev->idev = idev;
+	/* vdev lifecycle now managed by idev */
+	idev->vdev = vdev;
+	refcount_inc(&vdev->obj.users);
+	goto out_put_idev_unlock;
 
 out_abort:
 	iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_put_idev_unlock:
+	mutex_unlock(&idev->lock);
 out_put_idev:
 	iommufd_put_object(ucmd->ictx, &idev->obj);
 out_put_viommu:
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-10  6:51 [RFC PATCH] iommufd: Destroy vdevice on device unbind Aneesh Kumar K.V (Arm)
@ 2025-06-12  8:05 ` Tian, Kevin
  2025-06-12 17:26   ` Jason Gunthorpe
  2025-06-16  4:46   ` Aneesh Kumar K.V
  0 siblings, 2 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-12  8:05 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org
  Cc: Jason Gunthorpe, Joerg Roedel, Will Deacon, Robin Murphy

> From: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> Sent: Tuesday, June 10, 2025 2:52 PM
> 
> The iommufd subsystem uses the VFIO character device descriptor to bind

it's the 'user' or 'vmm' initiating that operation, not the subsystem itself.

> a device file to an iommufd context via the VFIO_DEVICE_BIND_IOMMUFD
> ioctl. This binding returns a bind_id, which is then used in subsequent

there is no 'bind_id' in any uAPI. let's stick to 'devid'.

> iommufd ioctls such as IOMMU_HWPT_ALLOC, IOMMU_VIOMMU_ALLOC,
> and
> IOMMU_VDEVICE_ALLOC.
> 
> Among these, IOMMU_VDEVICE_ALLOC is special—the lifetime of a virtual
> device (vdevice) should be tied to the bound state of its physical
> device.
> 
> In the current kernel, there is no enforced dependency between
> iommufd_device and iommufd_vdevice. This patch introduces such a
> dependency: when the device is unbound, the associated vdevice is now
> automatically destroyed.

This went backward.

The initial v5 patch [1] from Nicolin was similar to what this
patch does. Jason explained [2] why it's unsafe to destroy "userspace
created" objects behind the scene. And a general rule in iommufd is
to not take long term references on kernel owned objects.

[1] https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1729897278.git.nicolinc@nvidia.com/
[2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/

> 
> Although there is already an implicit dependency—vdevices can only be
> destroyed after the iommufd_device is unbound due to the VFIO cdev file
> descriptor holding a reference to the iommu file descriptor—this patch

I didn't get this part. The user owns the life cycle of its created objects.
We don't have such restriction that a vdevice object can be destroyed
only after device is unbound...

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-12  8:05 ` Tian, Kevin
@ 2025-06-12 17:26   ` Jason Gunthorpe
  2025-06-13  7:31     ` Tian, Kevin
  2025-06-16  4:46   ` Aneesh Kumar K.V
  1 sibling, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-12 17:26 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

On Thu, Jun 12, 2025 at 08:05:37AM +0000, Tian, Kevin wrote:
> The initial v5 patch [1] from Nicolin was similar to what this
> patch does. Jason explained [2] why it's unsafe to destroy "userspace
> created" objects behind the scene. And a general rule in iommufd is
> to not take long term references on kernel owned objects.
> 
> [1] https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1729897278.git.nicolinc@nvidia.com/
> [2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/

Yes, we have a problem here where we both can't let VFIO go away while
the vdevice is present nor can we let the vdevice be fully deleted.

At that point it wasn't such a big deal, but the new stuff seems to
make vdevice more complicated that it cannot out live the idevice.

Probably the answer is to tombstone the vdevice in the xarray so the
ID is still there and userspace can still destroy it while destroying
everything linked to it?

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-12 17:26   ` Jason Gunthorpe
@ 2025-06-13  7:31     ` Tian, Kevin
  2025-06-13 12:42       ` Jason Gunthorpe
  2025-06-16  4:15       ` Aneesh Kumar K.V
  0 siblings, 2 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-13  7:31 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Friday, June 13, 2025 1:27 AM
> 
> On Thu, Jun 12, 2025 at 08:05:37AM +0000, Tian, Kevin wrote:
> > The initial v5 patch [1] from Nicolin was similar to what this
> > patch does. Jason explained [2] why it's unsafe to destroy "userspace
> > created" objects behind the scene. And a general rule in iommufd is
> > to not take long term references on kernel owned objects.
> >
> > [1]
> https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1
> 729897278.git.nicolinc@nvidia.com/
> > [2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/
> 
> Yes, we have a problem here where we both can't let VFIO go away while
> the vdevice is present nor can we let the vdevice be fully deleted.
> 
> At that point it wasn't such a big deal, but the new stuff seems to
> make vdevice more complicated that it cannot out live the idevice.
> 
> Probably the answer is to tombstone the vdevice in the xarray so the
> ID is still there and userspace can still destroy it while destroying
> everything linked to it?
> 

yeah that seems to be the option if the said life-cycle dependency
cannot be removed...

conceptually it's still a bit unclean as the user needs to know that
the vdevice object is special after idevice is unbound i.e. it can only
be destroyed instead of supporting any other kind of operations.

hmm if the user needs to build certain knowledge anyway can we 
go one step further to state that the vdevice will be destroyed
automatically once its idevice is unbound so the user shouldn't
attempt to explicitly destroy it again after unbind?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-13  7:31     ` Tian, Kevin
@ 2025-06-13 12:42       ` Jason Gunthorpe
  2025-06-16  3:56         ` Xu Yilun
                           ` (2 more replies)
  2025-06-16  4:15       ` Aneesh Kumar K.V
  1 sibling, 3 replies; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-13 12:42 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

On Fri, Jun 13, 2025 at 07:31:48AM +0000, Tian, Kevin wrote:
> yeah that seems to be the option if the said life-cycle dependency
> cannot be removed...
> 
> conceptually it's still a bit unclean as the user needs to know that
> the vdevice object is special after idevice is unbound i.e. it can only
> be destroyed instead of supporting any other kind of operations.

I would say userspace is somewhat malfunctioning if it destroys vfio
before the vdevice. So the main aim here should be to contain the
resulting mess, but still expect userspace to destroy the vdevice
without a failure.

> hmm if the user needs to build certain knowledge anyway can we 
> go one step further to state that the vdevice will be destroyed
> automatically once its idevice is unbound so the user shouldn't
> attempt to explicitly destroy it again after unbind?

I would assume a malfunctioning userspace is probably going to destroy
the vdevice explicitly. If it had proper knowledge it wouldn't have
done this in the first place :)

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-13 12:42       ` Jason Gunthorpe
@ 2025-06-16  3:56         ` Xu Yilun
  2025-06-16  4:28         ` Aneesh Kumar K.V
  2025-06-16  5:19         ` Tian, Kevin
  2 siblings, 0 replies; 24+ messages in thread
From: Xu Yilun @ 2025-06-16  3:56 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

On Fri, Jun 13, 2025 at 09:42:02AM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 13, 2025 at 07:31:48AM +0000, Tian, Kevin wrote:
> > yeah that seems to be the option if the said life-cycle dependency
> > cannot be removed...
> > 
> > conceptually it's still a bit unclean as the user needs to know that
> > the vdevice object is special after idevice is unbound i.e. it can only
> > be destroyed instead of supporting any other kind of operations.
> 
> I would say userspace is somewhat malfunctioning if it destroys vfio
> before the vdevice. So the main aim here should be to contain the
> resulting mess, but still expect userspace to destroy the vdevice
> without a failure.
> 
> > hmm if the user needs to build certain knowledge anyway can we 
> > go one step further to state that the vdevice will be destroyed
> > automatically once its idevice is unbound so the user shouldn't
> > attempt to explicitly destroy it again after unbind?

I think this statement is complex to user. I'd rather from user POV the
vdev_id's lifecycle is mostly the same as other iommufd objects. The
only difference should be, after idev unbind, vdev_id would be
disfunctional, IOCTLs against the vdev_id would fail except IOMMU_DESTROY.

My understanding of the tombstone idea is:

 - Before idev unbind, IOCTL(IOMMU_DESTROY, vdev_id) could free the vdev
   as normal.
 - On idev unbind, destroy the associate vdev (if exists), but still
   reserve the ictx->objects xa entry.
 - After idev unbind & on IOCTL(IOMMU_DESTROY, vdev_id), just free the
   xa entry if it's a tombstone.
 - On fops_release, free the tombstones if exist.

Thanks,
Yilun

> 
> I would assume a malfunctioning userspace is probably going to destroy
> the vdevice explicitly. If it had proper knowledge it wouldn't have
> done this in the first place :)
> 
> Jason
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-13  7:31     ` Tian, Kevin
  2025-06-13 12:42       ` Jason Gunthorpe
@ 2025-06-16  4:15       ` Aneesh Kumar K.V
  2025-06-16  5:21         ` Tian, Kevin
  1 sibling, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-16  4:15 UTC (permalink / raw)
  To: Tian, Kevin, Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

"Tian, Kevin" <kevin.tian@intel.com> writes:

>> From: Jason Gunthorpe <jgg@ziepe.ca>
>> Sent: Friday, June 13, 2025 1:27 AM
>> 
>> On Thu, Jun 12, 2025 at 08:05:37AM +0000, Tian, Kevin wrote:
>> > The initial v5 patch [1] from Nicolin was similar to what this
>> > patch does. Jason explained [2] why it's unsafe to destroy "userspace
>> > created" objects behind the scene. And a general rule in iommufd is
>> > to not take long term references on kernel owned objects.
>> >
>> > [1]
>> https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1
>> 729897278.git.nicolinc@nvidia.com/
>> > [2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/
>> 
>> Yes, we have a problem here where we both can't let VFIO go away while
>> the vdevice is present nor can we let the vdevice be fully deleted.
>> 
>> At that point it wasn't such a big deal, but the new stuff seems to
>> make vdevice more complicated that it cannot out live the idevice.
>> 
>> Probably the answer is to tombstone the vdevice in the xarray so the
>> ID is still there and userspace can still destroy it while destroying
>> everything linked to it?
>> 
>
> yeah that seems to be the option if the said life-cycle dependency
> cannot be removed...
>
> conceptually it's still a bit unclean as the user needs to know that
> the vdevice object is special after idevice is unbound i.e. it can only
> be destroyed instead of supporting any other kind of operations.
>
> hmm if the user needs to build certain knowledge anyway can we 
> go one step further to state that the vdevice will be destroyed
> automatically once its idevice is unbound so the user shouldn't
> attempt to explicitly destroy it again after unbind?

That is what this patch is does. ie, it automatically destroy the
vdevice while unbinding the idevice.

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-13 12:42       ` Jason Gunthorpe
  2025-06-16  3:56         ` Xu Yilun
@ 2025-06-16  4:28         ` Aneesh Kumar K.V
  2025-06-16  5:37           ` Tian, Kevin
  2025-06-16  5:19         ` Tian, Kevin
  2 siblings, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-16  4:28 UTC (permalink / raw)
  To: Jason Gunthorpe, Tian, Kevin
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Fri, Jun 13, 2025 at 07:31:48AM +0000, Tian, Kevin wrote:
>> yeah that seems to be the option if the said life-cycle dependency
>> cannot be removed...
>> 
>> conceptually it's still a bit unclean as the user needs to know that
>> the vdevice object is special after idevice is unbound i.e. it can only
>> be destroyed instead of supporting any other kind of operations.
>
> I would say userspace is somewhat malfunctioning if it destroys vfio
> before the vdevice. So the main aim here should be to contain the
> resulting mess, but still expect userspace to destroy the vdevice
> without a failure.
>

The destruction of the vdevice is triggered by the .release method of
the iommufd file operations(iommufd_fops_release())

and the destruction of the idevice is driven by the .release method of
vfio cdev.

    vfio_device_fops_release()
    → vfio_df_device_last_close()
    → vfio_iommufd_physical_unbind()
    → iommufd_device_unbind()

The vfio subsystem also retains a reference to the iommufd file descriptor through:

    vfio_df_ioctl_bind_iommufd()
    → iommufd_ctx_from_fd()

This reference prevents the vdevice from being destroyed while the idevice remains bound.

So, IIUC, the current destruction flow is: first destroy vfio, and then destroy the vdevice?

>
>> hmm if the user needs to build certain knowledge anyway can we 
>> go one step further to state that the vdevice will be destroyed
>> automatically once its idevice is unbound so the user shouldn't
>> attempt to explicitly destroy it again after unbind?
>
> I would assume a malfunctioning userspace is probably going to destroy
> the vdevice explicitly. If it had proper knowledge it wouldn't have
> done this in the first place :)
>
> Jason

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-12  8:05 ` Tian, Kevin
  2025-06-12 17:26   ` Jason Gunthorpe
@ 2025-06-16  4:46   ` Aneesh Kumar K.V
  2025-06-16  5:48     ` Tian, Kevin
  2025-06-16  5:58     ` Xu Yilun
  1 sibling, 2 replies; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-16  4:46 UTC (permalink / raw)
  To: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org
  Cc: Jason Gunthorpe, Joerg Roedel, Will Deacon, Robin Murphy

"Tian, Kevin" <kevin.tian@intel.com> writes:

>> From: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
>> Sent: Tuesday, June 10, 2025 2:52 PM
>> 
>> The iommufd subsystem uses the VFIO character device descriptor to bind
>
> it's the 'user' or 'vmm' initiating that operation, not the subsystem itself.
>
>> a device file to an iommufd context via the VFIO_DEVICE_BIND_IOMMUFD
>> ioctl. This binding returns a bind_id, which is then used in subsequent
>
> there is no 'bind_id' in any uAPI. let's stick to 'devid'.
>
>> iommufd ioctls such as IOMMU_HWPT_ALLOC, IOMMU_VIOMMU_ALLOC,
>> and
>> IOMMU_VDEVICE_ALLOC.
>> 
>> Among these, IOMMU_VDEVICE_ALLOC is special—the lifetime of a virtual
>> device (vdevice) should be tied to the bound state of its physical
>> device.
>> 
>> In the current kernel, there is no enforced dependency between
>> iommufd_device and iommufd_vdevice. This patch introduces such a
>> dependency: when the device is unbound, the associated vdevice is now
>> automatically destroyed.
>
> This went backward.
>
> The initial v5 patch [1] from Nicolin was similar to what this
> patch does. Jason explained [2] why it's unsafe to destroy "userspace
> created" objects behind the scene. And a general rule in iommufd is
> to not take long term references on kernel owned objects.
>
> [1] https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1729897278.git.nicolinc@nvidia.com/
> [2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/
>
>> 
>> Although there is already an implicit dependency—vdevices can only be
>> destroyed after the iommufd_device is unbound due to the VFIO cdev file
>> descriptor holding a reference to the iommu file descriptor—this patch
>
> I didn't get this part. The user owns the life cycle of its created objects.
> We don't have such restriction that a vdevice object can be destroyed
> only after device is unbound...
>

What I described above explains how the kernel manages destruction of
these objects.

The vfio subsystem retains a reference to the iommufd file descriptor
through:

    vfio_df_ioctl_bind_iommufd() → iommufd_ctx_from_fd()

This reference prevents the associated vdevice from being destroyed when
the iommufd file descriptor is closed, as long as the idevice remains
bound. While vdevice objects can still be explicitly destroyed using
iommufd_destroy(), with this patch, if the idevice is still bound,
attempting to destroy the vdevice will return EBUSY.

In effect, the change ensures that once a vdevice is created, its
lifecycle is tied to that of the idevice.

From other emails in this thread, I understand that beyond the the
changes in this patch, we want to prevent the reuse of the vdevice
object ID. We also need to ensure that any userspace-initiated operation
on this vdevice object ID results in an error after the idevice is
unbound?

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-13 12:42       ` Jason Gunthorpe
  2025-06-16  3:56         ` Xu Yilun
  2025-06-16  4:28         ` Aneesh Kumar K.V
@ 2025-06-16  5:19         ` Tian, Kevin
  2 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-16  5:19 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Aneesh Kumar K.V (Arm), iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Friday, June 13, 2025 8:42 PM
> 
> On Fri, Jun 13, 2025 at 07:31:48AM +0000, Tian, Kevin wrote:
> > yeah that seems to be the option if the said life-cycle dependency
> > cannot be removed...
> >
> > conceptually it's still a bit unclean as the user needs to know that
> > the vdevice object is special after idevice is unbound i.e. it can only
> > be destroyed instead of supporting any other kind of operations.
> 
> I would say userspace is somewhat malfunctioning if it destroys vfio
> before the vdevice. So the main aim here should be to contain the
> resulting mess, but still expect userspace to destroy the vdevice
> without a failure.
> 
> > hmm if the user needs to build certain knowledge anyway can we
> > go one step further to state that the vdevice will be destroyed
> > automatically once its idevice is unbound so the user shouldn't
> > attempt to explicitly destroy it again after unbind?
> 
> I would assume a malfunctioning userspace is probably going to destroy
> the vdevice explicitly. If it had proper knowledge it wouldn't have
> done this in the first place :)

Okay, that makes some sense, though I'm not sure how much gain
the tombstone state actually provides to a *malfunctioning' 
userspace (vs. a destroy after unbind gets -ENOENT or wrongly affects
another object reusing the vdevice id, but still with the mess contained
within the context)...

btw do you expect the tombstone state applied to only vdevice or
being a generic concept to any userspace-created objects? If the
former we may need pay attention to future new objects which 
create dependency on vdevice hence may need to inherit the
tombstone state. In that case certain mechanism might be required
to help detect miss of such state propagation.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16  4:15       ` Aneesh Kumar K.V
@ 2025-06-16  5:21         ` Tian, Kevin
  0 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-16  5:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

> From: Aneesh Kumar K.V <aneesh.kumar@kernel.org>
> Sent: Monday, June 16, 2025 12:15 PM
> 
> "Tian, Kevin" <kevin.tian@intel.com> writes:
> 
> >> From: Jason Gunthorpe <jgg@ziepe.ca>
> >> Sent: Friday, June 13, 2025 1:27 AM
> >>
> >> On Thu, Jun 12, 2025 at 08:05:37AM +0000, Tian, Kevin wrote:
> >> > The initial v5 patch [1] from Nicolin was similar to what this
> >> > patch does. Jason explained [2] why it's unsafe to destroy "userspace
> >> > created" objects behind the scene. And a general rule in iommufd is
> >> > to not take long term references on kernel owned objects.
> >> >
> >> > [1]
> >>
> https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1
> >> 729897278.git.nicolinc@nvidia.com/
> >> > [2] https://lore.kernel.org/all/20241029184801.GW6956@nvidia.com/
> >>
> >> Yes, we have a problem here where we both can't let VFIO go away while
> >> the vdevice is present nor can we let the vdevice be fully deleted.
> >>
> >> At that point it wasn't such a big deal, but the new stuff seems to
> >> make vdevice more complicated that it cannot out live the idevice.
> >>
> >> Probably the answer is to tombstone the vdevice in the xarray so the
> >> ID is still there and userspace can still destroy it while destroying
> >> everything linked to it?
> >>
> >
> > yeah that seems to be the option if the said life-cycle dependency
> > cannot be removed...
> >
> > conceptually it's still a bit unclean as the user needs to know that
> > the vdevice object is special after idevice is unbound i.e. it can only
> > be destroyed instead of supporting any other kind of operations.
> >
> > hmm if the user needs to build certain knowledge anyway can we
> > go one step further to state that the vdevice will be destroyed
> > automatically once its idevice is unbound so the user shouldn't
> > attempt to explicitly destroy it again after unbind?
> 
> That is what this patch is does. ie, it automatically destroy the
> vdevice while unbinding the idevice.
> 

plus update of the uAPI description if going this route

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16  4:28         ` Aneesh Kumar K.V
@ 2025-06-16  5:37           ` Tian, Kevin
  2025-06-16 16:49             ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Tian, Kevin @ 2025-06-16  5:37 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

> From: Aneesh Kumar K.V <aneesh.kumar@kernel.org>
> Sent: Monday, June 16, 2025 12:29 PM
> 
> Jason Gunthorpe <jgg@ziepe.ca> writes:
> 
> > On Fri, Jun 13, 2025 at 07:31:48AM +0000, Tian, Kevin wrote:
> >> yeah that seems to be the option if the said life-cycle dependency
> >> cannot be removed...
> >>
> >> conceptually it's still a bit unclean as the user needs to know that
> >> the vdevice object is special after idevice is unbound i.e. it can only
> >> be destroyed instead of supporting any other kind of operations.
> >
> > I would say userspace is somewhat malfunctioning if it destroys vfio
> > before the vdevice. So the main aim here should be to contain the
> > resulting mess, but still expect userspace to destroy the vdevice
> > without a failure.
> >
> 
> The destruction of the vdevice is triggered by the .release method of
> the iommufd file operations(iommufd_fops_release())

or by Ioctl(IOMMU_DESTROY)

> 
> and the destruction of the idevice is driven by the .release method of
> vfio cdev.
> 
>     vfio_device_fops_release()
>     → vfio_df_device_last_close()
>     → vfio_iommufd_physical_unbind()
>     → iommufd_device_unbind()
> 
> The vfio subsystem also retains a reference to the iommufd file descriptor
> through:
> 
>     vfio_df_ioctl_bind_iommufd()
>     → iommufd_ctx_from_fd()
> 
> This reference prevents the vdevice from being destroyed while the idevice
> remains bound.
> 
> So, IIUC, the current destruction flow is: first destroy vfio, and then destroy
> the vdevice?
> 

the above flow is about the userspace exiting abnormally then
the refcounting between FDs decides the destruction flow. At that
point the iommufd objects are not userspace-accessible.

the expected destruction flow from userspace is to IOMMU_DESTROY
the vdevice object before closing the vfio cdev fd which unbinds the
idevice.

now we are discussing how to handle a malfunction userspace which
violates that flow: let it be or add a tomestone state, after extending
unbind to destroy the vdevice...



^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16  4:46   ` Aneesh Kumar K.V
@ 2025-06-16  5:48     ` Tian, Kevin
  2025-06-16  5:58     ` Xu Yilun
  1 sibling, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-16  5:48 UTC (permalink / raw)
  To: Aneesh Kumar K.V, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org
  Cc: Jason Gunthorpe, Joerg Roedel, Will Deacon, Robin Murphy

> From: Aneesh Kumar K.V <aneesh.kumar@kernel.org>
> Sent: Monday, June 16, 2025 12:46 PM
> 
> What I described above explains how the kernel manages destruction of
> these objects.
> 
> The vfio subsystem retains a reference to the iommufd file descriptor
> through:
> 
>     vfio_df_ioctl_bind_iommufd() → iommufd_ctx_from_fd()
> 
> This reference prevents the associated vdevice from being destroyed when
> the iommufd file descriptor is closed, as long as the idevice remains
> bound. While vdevice objects can still be explicitly destroyed using
> iommufd_destroy(), with this patch, if the idevice is still bound,
> attempting to destroy the vdevice will return EBUSY.

this is counter-intuitive. My impression from previous discussion was
that the vdevice lifecycle is gated by the idevice (i.e. cannot live
longer) once they are connected, but not that the vdevice can only
be destroyed indirectly via unbind (cannot live shorter)...

If this is a TIO specific requirement it's better to articulate it early.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16  4:46   ` Aneesh Kumar K.V
  2025-06-16  5:48     ` Tian, Kevin
@ 2025-06-16  5:58     ` Xu Yilun
  1 sibling, 0 replies; 24+ messages in thread
From: Xu Yilun @ 2025-06-16  5:58 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Jason Gunthorpe, Joerg Roedel, Will Deacon, Robin Murphy

> While vdevice objects can still be explicitly destroyed using
> iommufd_destroy(), with this patch, if the idevice is still bound,
> attempting to destroy the vdevice will return EBUSY.

I didn't get the necessity of this restriction. A vdev could not
outlive idev, but it could be ended earlier than idev, isn't it?

> 
> In effect, the change ensures that once a vdevice is created, its
> lifecycle is tied to that of the idevice.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16  5:37           ` Tian, Kevin
@ 2025-06-16 16:49             ` Jason Gunthorpe
  2025-06-17  8:07               ` Aneesh Kumar K.V
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-16 16:49 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Aneesh Kumar K.V, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, Joerg Roedel, Will Deacon,
	Robin Murphy

On Mon, Jun 16, 2025 at 05:37:58AM +0000, Tian, Kevin wrote:

> the expected destruction flow from userspace is to IOMMU_DESTROY
> the vdevice object before closing the vfio cdev fd which unbinds the
> idevice.
> 
> now we are discussing how to handle a malfunction userspace which
> violates that flow: let it be or add a tomestone state, after extending
> unbind to destroy the vdevice...

Right, to be clear the concern is

close(vfio_cdev)
ioctl(DESTROY, vdevice_id);
close(iommufd)

Which is a possibile sequence for userspace/syzkaller to trigger.

My position has historically been that DESTROY should not destroy some
random unrelated object eg because a parallel thread did an allocation
and re-used the kernel deleted ID. ID's that belong to userspace have
to be retained right up until DESTROY.

Thus we've historically avoided creating scenarios where IDs owned by
userspace are destroyed by the kernel.

Given we can say the above is illegal use of the API we could leave
behind a tombstone in the xarray. The goal would be to prevent lookup
of the object (since it is destroyed) and prevent reallocation of the
ID.

For instance a simple thing would be to drop in XA_ZERO_ENTRY, this
will reserve the ID and fail all future operations. The userspace will
get a failure on DESTROY so they know they did something wrong. The fd
close will clean up the reserved ID.

We just need to make some decision about the above sequence.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-16 16:49             ` Jason Gunthorpe
@ 2025-06-17  8:07               ` Aneesh Kumar K.V
  2025-06-17 18:34                 ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-17  8:07 UTC (permalink / raw)
  To: Jason Gunthorpe, Tian, Kevin
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Mon, Jun 16, 2025 at 05:37:58AM +0000, Tian, Kevin wrote:
>
>> the expected destruction flow from userspace is to IOMMU_DESTROY
>> the vdevice object before closing the vfio cdev fd which unbinds the
>> idevice.
>> 
>> now we are discussing how to handle a malfunction userspace which
>> violates that flow: let it be or add a tomestone state, after extending
>> unbind to destroy the vdevice...
>
> Right, to be clear the concern is
>
> close(vfio_cdev)
> ioctl(DESTROY, vdevice_id);
> close(iommufd)
>
> Which is a possibile sequence for userspace/syzkaller to trigger.
>
> My position has historically been that DESTROY should not destroy some
> random unrelated object eg because a parallel thread did an allocation
> and re-used the kernel deleted ID. ID's that belong to userspace have
> to be retained right up until DESTROY.
>
> Thus we've historically avoided creating scenarios where IDs owned by
> userspace are destroyed by the kernel.
>
> Given we can say the above is illegal use of the API we could leave
> behind a tombstone in the xarray. The goal would be to prevent lookup
> of the object (since it is destroyed) and prevent reallocation of the
> ID.
>
> For instance a simple thing would be to drop in XA_ZERO_ENTRY, this
> will reserve the ID and fail all future operations. The userspace will
> get a failure on DESTROY so they know they did something wrong. The fd
> close will clean up the reserved ID.
>

How do we reclaim that object id for further reuse? 

is it that if there is a request for a iommufd_object_remove() with object
refcount > 1, we insert a XA_ZERO_ENTRY and convert that to NULL entry
on IOMMU_DESTROY?

Something like below

The below sequence will work with the changes as
 close(vfio_cdev) -> vdev destroy
 ioctl(DESTROY, vdevice_id); -> vdev object id reclaim
 close(iommufd)

 ioctl(DESTROY, vdevice_id); -> EBUSY
 close(vfio_cdev) -> vdev destroy
 close(iommufd)


diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 7b3f82afd295..1f8e4fd0e240 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -289,7 +289,7 @@ void iommufd_device_unbind(struct iommufd_device *idev)
 
 	if (idev->vdev)
 		/* extra refcount taken during vdevice alloc */
-		iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj);
+		__iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj, REMOVE_OBJ_FORCE);
 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 9c4472df80c6..8c5fc0fe92ce 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -186,9 +186,9 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
 void iommufd_object_finalize(struct iommufd_ctx *ictx,
 			     struct iommufd_object *obj);
 
-enum {
-	REMOVE_WAIT_SHORTTERM = 1,
-};
+#define	REMOVE_WAIT_SHORTTERM	BIT(0)
+#define	REMOVE_OBJ_FORCE	BIT(1)
+
 int iommufd_object_remove(struct iommufd_ctx *ictx,
 			  struct iommufd_object *to_destroy, u32 id,
 			  unsigned int flags);
@@ -198,12 +198,13 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
  * point the caller has no shortterm_users reference and at least the xarray
  * will be holding one.
  */
-static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-					       struct iommufd_object *obj)
+static inline void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+						 struct iommufd_object *obj,
+						 unsigned int flags)
 {
 	int ret;
 
-	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+	ret = iommufd_object_remove(ictx, obj, obj->id, flags | REMOVE_WAIT_SHORTTERM);
 
 	/*
 	 * If there is a bug and we couldn't destroy the object then we did put
@@ -213,6 +214,12 @@ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
 	WARN_ON(ret);
 }
 
+static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+						 struct iommufd_object *obj)
+{
+	return __iommufd_object_destroy_user(ictx, obj, 0);
+}
+
 /*
  * The HWPT allocated by autodomains is used in possibly many devices and
  * is automatically destroyed when its refcount reaches zero.
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b7aa725e6b37..d27b61787a53 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -88,7 +88,8 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 
 	xa_lock(&ictx->objects);
 	obj = xa_load(&ictx->objects, id);
-	if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
+	if (!obj || xa_is_zero(obj) ||
+	    (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
 	    !iommufd_lock_obj(obj))
 		obj = ERR_PTR(-ENOENT);
 	xa_unlock(&ictx->objects);
@@ -157,17 +158,27 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
 			ret = -ENOENT;
 			goto err_xa;
 		}
-	} else if (xa_is_zero(obj) || !obj) {
+	} else if (xa_is_zero(obj)) {
+		/* We can reclaim the id now. */
+		xas_store(&xas, NULL);
+		ret = 0;
+		goto err_xa;
+	} else if (!obj) {
 		ret = -ENOENT;
 		goto err_xa;
 	}
 
 	if (!refcount_dec_if_one(&obj->users)) {
-		ret = -EBUSY;
-		goto err_xa;
+		if (flags & REMOVE_OBJ_FORCE) {
+			xas_store(&xas, XA_ZERO_ENTRY);
+		} else {
+			ret = -EBUSY;
+			goto err_xa;
+		}
+	} else {
+		xas_store(&xas, NULL);
 	}
 
-	xas_store(&xas, NULL);
 	if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
 		ictx->vfio_ioas = NULL;
 	xa_unlock(&ictx->objects);
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index d9749d9d2ffb..4fc74ada0e62 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -213,6 +213,8 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	/* vdev lifecycle now managed by idev */
 	idev->vdev = vdev;
 	refcount_inc(&vdev->obj.users);
+	/* Increment refcount since userspace can hold the obj id */
+	refcount_inc(&vdev->obj.users);
 	goto out_put_idev_unlock;
 
 out_abort:

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-17  8:07               ` Aneesh Kumar K.V
@ 2025-06-17 18:34                 ` Jason Gunthorpe
  2025-06-18  5:29                   ` Aneesh Kumar K.V
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-17 18:34 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

On Tue, Jun 17, 2025 at 01:37:04PM +0530, Aneesh Kumar K.V wrote:

> How do we reclaim that object id for further reuse? 

Maybe just don't? Userspace did something it shouldn't, it now leaked
8 bytes of kernel memory until the FD is closed.

> is it that if there is a request for a iommufd_object_remove() with object
> refcount > 1, we insert a XA_ZERO_ENTRY and convert that to NULL entry
> on IOMMU_DESTROY?

Oh no we can't do that, if the refcount is elevated that is a problem,
it means some thread somewhere is using that memory.

We can sleep and wait for shortterm_users to go to zero and if users
is still elevated then we are toast. WARN_ON and reatin it in the
xarray and hope for the best.

So the thread that will trigger the detruction needs to have a users
refcount of 1. Meaning users needs to be one while idle in the xarray,
and the idevice destruction will obtain a users=2 from its pointer
under some kind of lock.

> -enum {
> -	REMOVE_WAIT_SHORTTERM = 1,
> -};
> +#define	REMOVE_WAIT_SHORTTERM	BIT(0)
> +#define	REMOVE_OBJ_FORCE	BIT(1)

You can keep the enum for flags, but 'force' isn't the right name. I
would think it is 'tombstone'

> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index b7aa725e6b37..d27b61787a53 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -88,7 +88,8 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
>  
>  	xa_lock(&ictx->objects);
>  	obj = xa_load(&ictx->objects, id);
> -	if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
> +	if (!obj || xa_is_zero(obj) ||
> +	    (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
>  	    !iommufd_lock_obj(obj))

xa_load can't return xa_is_zero(), xas_load() can

We already use XA_ZERO_ENTRY to hold an ID during allocation till
finalize.

I think you want to add a new API

iommufd_object_tombstone_user(idev->ictx, &idev->vdev->obj);

Which I think is the same as the existing
iommufd_object_destroy_user() except it uses tombstone..

The only thing tombstone does is:

	xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);

All the rest of the logic including the users and shorterm check would
be the same.

> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -213,6 +213,8 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
>  	/* vdev lifecycle now managed by idev */
>  	idev->vdev = vdev;
>  	refcount_inc(&vdev->obj.users);
> +	/* Increment refcount since userspace can hold the obj id */
> +	refcount_inc(&vdev->obj.users);
>  	goto out_put_idev_unlock;

I don't think this should change.. There should be no extra user refs
or userspace can't destroy it.

The pointer back from the idevice needs locking to protect it while a
refcount is obtained.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-17 18:34                 ` Jason Gunthorpe
@ 2025-06-18  5:29                   ` Aneesh Kumar K.V
  2025-06-18 13:35                     ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-18  5:29 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Tue, Jun 17, 2025 at 01:37:04PM +0530, Aneesh Kumar K.V wrote:
>  
>> How do we reclaim that object id for further reuse? 
>
> Maybe just don't? Userspace did something it shouldn't, it now leaked
> 8 bytes of kernel memory until the FD is closed.
>

Between the two sequences below, Sequence 1 is the correct one, since we
want the object ID to be released after calling ioctl(DESTROY,
vdevice_id), right?

Sequence 1 (Correct):

close(vfio_cdev)          → triggers vdevice destruction  
ioctl(DESTROY, vdevice_id) → reclaims vdevice object ID  
close(iommufd)  

Sequence 2:

ioctl(DESTROY, vdevice_id) → returns EBUSY  
close(vfio_cdev)           → triggers vdevice destruction  
close(iommufd)  

Just to confirm: We agree that an EBUSY return from ioctl(DESTROY,
vdevice_id) is expected if it's called before vfio_df_unbind_iommufd(),
correct?

>> is it that if there is a request for a iommufd_object_remove() with object
>> refcount > 1, we insert a XA_ZERO_ENTRY and convert that to NULL entry
>> on IOMMU_DESTROY?
>
> Oh no we can't do that, if the refcount is elevated that is a problem,
> it means some thread somewhere is using that memory.
>
> We can sleep and wait for shortterm_users to go to zero and if users
> is still elevated then we are toast. WARN_ON and reatin it in the
> xarray and hope for the best.
>
> So the thread that will trigger the detruction needs to have a users
> refcount of 1. Meaning users needs to be one while idle in the xarray,
> and the idevice destruction will obtain a users=2 from its pointer
> under some kind of lock.
>
>> -enum {
>> -	REMOVE_WAIT_SHORTTERM = 1,
>> -};
>> +#define	REMOVE_WAIT_SHORTTERM	BIT(0)
>> +#define	REMOVE_OBJ_FORCE	BIT(1)
>
> You can keep the enum for flags, but 'force' isn't the right name. I
> would think it is 'tombstone'
>

These values represent bit flags (e.g., 1, 2, 4, ...), meaning they are
not mutually exclusive and can be combined using bitwise operations. As
such, using an enum—which is typically intended for mutually exclusive
values—is not appropriate in this case?

>
>> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
>> index b7aa725e6b37..d27b61787a53 100644
>> --- a/drivers/iommu/iommufd/main.c
>> +++ b/drivers/iommu/iommufd/main.c
>> @@ -88,7 +88,8 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
>>  
>>  	xa_lock(&ictx->objects);
>>  	obj = xa_load(&ictx->objects, id);
>> -	if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
>> +	if (!obj || xa_is_zero(obj) ||
>> +	    (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
>>  	    !iommufd_lock_obj(obj))
>
> xa_load can't return xa_is_zero(), xas_load() can
>
> We already use XA_ZERO_ENTRY to hold an ID during allocation till
> finalize.
>
> I think you want to add a new API
>
> iommufd_object_tombstone_user(idev->ictx, &idev->vdev->obj);
>
> Which I think is the same as the existing
> iommufd_object_destroy_user() except it uses tombstone..
>
> The only thing tombstone does is:
>
> 	xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);
>
> All the rest of the logic including the users and shorterm check would
> be the same.
>
>> --- a/drivers/iommu/iommufd/viommu.c
>> +++ b/drivers/iommu/iommufd/viommu.c
>> @@ -213,6 +213,8 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
>>  	/* vdev lifecycle now managed by idev */
>>  	idev->vdev = vdev;
>>  	refcount_inc(&vdev->obj.users);
>> +	/* Increment refcount since userspace can hold the obj id */
>> +	refcount_inc(&vdev->obj.users);
>>  	goto out_put_idev_unlock;
>
> I don't think this should change.. There should be no extra user refs
> or userspace can't destroy it.
>
> The pointer back from the idevice needs locking to protect it while a
> refcount is obtained.
>
> Jason

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-18  5:29                   ` Aneesh Kumar K.V
@ 2025-06-18 13:35                     ` Jason Gunthorpe
  2025-06-18 14:52                       ` Aneesh Kumar K.V
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-18 13:35 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

On Wed, Jun 18, 2025 at 10:59:00AM +0530, Aneesh Kumar K.V wrote:
> Jason Gunthorpe <jgg@ziepe.ca> writes:
> 
> > On Tue, Jun 17, 2025 at 01:37:04PM +0530, Aneesh Kumar K.V wrote:
> >  
> >> How do we reclaim that object id for further reuse? 
> >
> > Maybe just don't? Userspace did something it shouldn't, it now leaked
> > 8 bytes of kernel memory until the FD is closed.
> >
> 
> Between the two sequences below, Sequence 1 is the correct one, since we
> want the object ID to be released after calling ioctl(DESTROY,
> vdevice_id), right?
> 
> Sequence 1 (Correct):
> 
> close(vfio_cdev)          → triggers vdevice destruction  
> ioctl(DESTROY, vdevice_id) → reclaims vdevice object ID  
> close(iommufd)  

This is wrong, the vdevice has outlived the idevice

> Sequence 2:
> 
> ioctl(DESTROY, vdevice_id) → returns EBUSY

It should not return EBUSY, it should destry the vdevice.

The full sequence I would expect a sane userspace to do is:

open(vfio_cdev)
ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
close(vfio_cdev);

> > You can keep the enum for flags, but 'force' isn't the right name. I
> > would think it is 'tombstone'
> 
> These values represent bit flags (e.g., 1, 2, 4, ...), meaning they are
> not mutually exclusive and can be combined using bitwise operations. As
> such, using an enum—which is typically intended for mutually exclusive
> values—is not appropriate in this case?

Meh. It is common to use enum for holding flags too. iommufd does it in many
places. There are good reasons to prefer to use enum vs #define.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-18 13:35                     ` Jason Gunthorpe
@ 2025-06-18 14:52                       ` Aneesh Kumar K.V
  2025-06-18 15:00                         ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-18 14:52 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Wed, Jun 18, 2025 at 10:59:00AM +0530, Aneesh Kumar K.V wrote:
>> Jason Gunthorpe <jgg@ziepe.ca> writes:
>> 
>> > On Tue, Jun 17, 2025 at 01:37:04PM +0530, Aneesh Kumar K.V wrote:
>> >  
>> >> How do we reclaim that object id for further reuse? 
>> >
>> > Maybe just don't? Userspace did something it shouldn't, it now leaked
>> > 8 bytes of kernel memory until the FD is closed.
>> >
>> 
>> Between the two sequences below, Sequence 1 is the correct one, since we
>> want the object ID to be released after calling ioctl(DESTROY,
>> vdevice_id), right?
>> 
>> Sequence 1 (Correct):
>> 
>> close(vfio_cdev)          → triggers vdevice destruction  
>> ioctl(DESTROY, vdevice_id) → reclaims vdevice object ID  
>> close(iommufd)  
>
> This is wrong, the vdevice has outlived the idevice
>
>> Sequence 2:
>> 
>> ioctl(DESTROY, vdevice_id) → returns EBUSY
>
> It should not return EBUSY, it should destry the vdevice.
>
> The full sequence I would expect a sane userspace to do is:
>
> open(vfio_cdev)
> ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
> ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
> close(vfio_cdev);
>

And if the user does

open(vfio_cdev)
ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
close(vfio_cdev);   -> this should call vdevice_destroy because idevice is getting destroyed here (we will put XA_ZERO_ENTRY here).
ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC) -> No error, we convert the XA_ZERO_ENTRY to NULL here?
ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-18 14:52                       ` Aneesh Kumar K.V
@ 2025-06-18 15:00                         ` Jason Gunthorpe
  2025-06-19  5:37                           ` Tian, Kevin
  2025-06-24 10:33                           ` Aneesh Kumar K.V
  0 siblings, 2 replies; 24+ messages in thread
From: Jason Gunthorpe @ 2025-06-18 15:00 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

On Wed, Jun 18, 2025 at 08:22:44PM +0530, Aneesh Kumar K.V wrote:
> > The full sequence I would expect a sane userspace to do is:
> >
> > open(vfio_cdev)
> > ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
> > close(vfio_cdev);
> >
> 
> And if the user does
> 
> open(vfio_cdev)
> ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> close(vfio_cdev);   -> this should call vdevice_destroy because idevice is getting destroyed here (we will put XA_ZERO_ENTRY here).

Yes, we have to destroy the vdevice internally here

> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC) -> No error, we convert the XA_ZERO_ENTRY to NULL here?

This should probably fail since the user has done something wrong and
it would be the only way to realize it. The failure could clean up the
tombstone, or it could just leak I don't have a strong feeling.

If you leak then using XA_ZERO_ENTRY is easy, if you want to clean up
then you'd have to have a global static 'tombstone object' that sits
in the xarray.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-18 15:00                         ` Jason Gunthorpe
@ 2025-06-19  5:37                           ` Tian, Kevin
  2025-06-24 10:33                           ` Aneesh Kumar K.V
  1 sibling, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-19  5:37 UTC (permalink / raw)
  To: Jason Gunthorpe, Aneesh Kumar K.V
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Wednesday, June 18, 2025 11:00 PM
> 
> On Wed, Jun 18, 2025 at 08:22:44PM +0530, Aneesh Kumar K.V wrote:
> > > The full sequence I would expect a sane userspace to do is:
> > >
> > > open(vfio_cdev)
> > > ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> > > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> > > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> > > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
> > > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
> > > close(vfio_cdev);
> > >
> >
> > And if the user does
> >
> > open(vfio_cdev)
> > ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> > close(vfio_cdev);   -> this should call vdevice_destroy because idevice is
> getting destroyed here (we will put XA_ZERO_ENTRY here).
> 
> Yes, we have to destroy the vdevice internally here
> 
> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC) -> No error, we
> convert the XA_ZERO_ENTRY to NULL here?
> 
> This should probably fail since the user has done something wrong and
> it would be the only way to realize it. The failure could clean up the
> tombstone, or it could just leak I don't have a strong feeling.
> 
> If you leak then using XA_ZERO_ENTRY is easy, if you want to clean up
> then you'd have to have a global static 'tombstone object' that sits
> in the xarray.
> 

I prefer to leaking as it's simpler. The only value of cleanup is to get
one free object ID, which matters little upon a failure condition.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-18 15:00                         ` Jason Gunthorpe
  2025-06-19  5:37                           ` Tian, Kevin
@ 2025-06-24 10:33                           ` Aneesh Kumar K.V
  2025-06-24 12:24                             ` Tian, Kevin
  1 sibling, 1 reply; 24+ messages in thread
From: Aneesh Kumar K.V @ 2025-06-24 10:33 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Tian, Kevin, iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	Joerg Roedel, Will Deacon, Robin Murphy

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Wed, Jun 18, 2025 at 08:22:44PM +0530, Aneesh Kumar K.V wrote:
>> > The full sequence I would expect a sane userspace to do is:
>> >
>> > open(vfio_cdev)
>> > ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
>> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
>> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
>> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
>> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
>> > close(vfio_cdev);
>> >
>> 
>> And if the user does
>> 
>> open(vfio_cdev)
>> ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
>> ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
>> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
>> close(vfio_cdev);   -> this should call vdevice_destroy because idevice is getting destroyed here (we will put XA_ZERO_ENTRY here).
>
> Yes, we have to destroy the vdevice internally here
>
>> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC) -> No error, we convert the XA_ZERO_ENTRY to NULL here?
>
> This should probably fail since the user has done something wrong and
> it would be the only way to realize it. The failure could clean up the
> tombstone, or it could just leak I don't have a strong feeling.
>
> If you leak then using XA_ZERO_ENTRY is easy, if you want to clean up
> then you'd have to have a global static 'tombstone object' that sits
> in the xarray.

I have a related question w.r.t iommufd_fops_release(). How is that safe
against a parallel iommufd_destroy()?

in iommufd_fops_release ()
xa_for_each(&ictx->objects, index, obj) {

                            ---> A parallel iommufd_destroy() can free the obj here ?

	if (!refcount_dec_if_one(&obj->users))
		continue;

}

-aneesh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [RFC PATCH] iommufd: Destroy vdevice on device unbind
  2025-06-24 10:33                           ` Aneesh Kumar K.V
@ 2025-06-24 12:24                             ` Tian, Kevin
  0 siblings, 0 replies; 24+ messages in thread
From: Tian, Kevin @ 2025-06-24 12:24 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org, Joerg Roedel,
	Will Deacon, Robin Murphy

> From: Aneesh Kumar K.V <aneesh.kumar@kernel.org>
> Sent: Tuesday, June 24, 2025 6:33 PM
> Jason Gunthorpe <jgg@ziepe.ca> writes:
> 
> > On Wed, Jun 18, 2025 at 08:22:44PM +0530, Aneesh Kumar K.V wrote:
> >> > The full sequence I would expect a sane userspace to do is:
> >> >
> >> > open(vfio_cdev)
> >> > ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> >> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> >> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> >> > ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC)
> >> > ioctl(iommufd, IOMMUFD_CMD_VIOMMU_DEALLOC)
> >> > close(vfio_cdev);
> >> >
> >>
> >> And if the user does
> >>
> >> open(vfio_cdev)
> >> ioctl(vfio_cdev, VFIO_DEVICE_BIND_IOMMUFD, iommufd)
> >> ioctl(iommufd, IOMMUFD_CMD_VIOMMU_ALLOC)
> >> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_ALLOC)
> >> close(vfio_cdev);   -> this should call vdevice_destroy because idevice is
> getting destroyed here (we will put XA_ZERO_ENTRY here).
> >
> > Yes, we have to destroy the vdevice internally here
> >
> >> ioctl(iommufd, IOMMUFD_CMD_VDEVICE_DEALLOC) -> No error, we
> convert the XA_ZERO_ENTRY to NULL here?
> >
> > This should probably fail since the user has done something wrong and
> > it would be the only way to realize it. The failure could clean up the
> > tombstone, or it could just leak I don't have a strong feeling.
> >
> > If you leak then using XA_ZERO_ENTRY is easy, if you want to clean up
> > then you'd have to have a global static 'tombstone object' that sits
> > in the xarray.
> 
> I have a related question w.r.t iommufd_fops_release(). How is that safe
> against a parallel iommufd_destroy()?
> 
> in iommufd_fops_release ()
> xa_for_each(&ictx->objects, index, obj) {
> 
>                             ---> A parallel iommufd_destroy() can free the obj here ?
> 
> 	if (!refcount_dec_if_one(&obj->users))
> 		continue;
> 
> }

@release is called when a fd is closed, while the fd cannot be closed
when there is ongoing ioctl on it. So when iommufd_fops_release()
is invoked there is no parallel user-initiated operation.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2025-06-24 12:24 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-10  6:51 [RFC PATCH] iommufd: Destroy vdevice on device unbind Aneesh Kumar K.V (Arm)
2025-06-12  8:05 ` Tian, Kevin
2025-06-12 17:26   ` Jason Gunthorpe
2025-06-13  7:31     ` Tian, Kevin
2025-06-13 12:42       ` Jason Gunthorpe
2025-06-16  3:56         ` Xu Yilun
2025-06-16  4:28         ` Aneesh Kumar K.V
2025-06-16  5:37           ` Tian, Kevin
2025-06-16 16:49             ` Jason Gunthorpe
2025-06-17  8:07               ` Aneesh Kumar K.V
2025-06-17 18:34                 ` Jason Gunthorpe
2025-06-18  5:29                   ` Aneesh Kumar K.V
2025-06-18 13:35                     ` Jason Gunthorpe
2025-06-18 14:52                       ` Aneesh Kumar K.V
2025-06-18 15:00                         ` Jason Gunthorpe
2025-06-19  5:37                           ` Tian, Kevin
2025-06-24 10:33                           ` Aneesh Kumar K.V
2025-06-24 12:24                             ` Tian, Kevin
2025-06-16  5:19         ` Tian, Kevin
2025-06-16  4:15       ` Aneesh Kumar K.V
2025-06-16  5:21         ` Tian, Kevin
2025-06-16  4:46   ` Aneesh Kumar K.V
2025-06-16  5:48     ` Tian, Kevin
2025-06-16  5:58     ` Xu Yilun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).