linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Xu Yilun <yilun.xu@linux.intel.com>
To: jgg@nvidia.com, jgg@ziepe.ca, kevin.tian@intel.com,
	will@kernel.org, aneesh.kumar@kernel.org
Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	joro@8bytes.org, robin.murphy@arm.com, shuah@kernel.org,
	nicolinc@nvidia.com, aik@amd.com, dan.j.williams@intel.com,
	baolu.lu@linux.intel.com, yilun.xu@linux.intel.com,
	yilun.xu@intel.com
Subject: [PATCH v2 3/4] iommufd: Destroy vdevice on idevice destroy
Date: Mon, 23 Jun 2025 17:49:45 +0800	[thread overview]
Message-ID: <20250623094946.1714996-4-yilun.xu@linux.intel.com> (raw)
In-Reply-To: <20250623094946.1714996-1-yilun.xu@linux.intel.com>

Destroy iommufd_vdevice(vdev) on iommufd_idevice(idev) destroy so that
vdev can't outlive idev.

iommufd_device(idev) represents the physical device bound to iommufd,
while the iommufd_vdevice(vdev) represents the virtual instance of the
physical device in the VM. The lifecycle of the vdev should not be
longer than idev. This doesn't cause real problem on existing use cases
cause vdev doesn't impact the physical device, only provides
virtualization information. But to extend vdev for Confidential
Computing(CC), there are needs to do secure configuration for the vdev,
e.g. TSM Bind/Unbind. These configurations should be rolled back on idev
destroy, or the external driver(VFIO) functionality may be impact.

Building the association between idev & vdev requires the two objects
pointing each other, but not referencing each other. This requires
proper locking. This is done by reviving some of Nicolin's patch [1].

There are 3 cases on idev destroy:

  1. vdev is already destroyed by userspace. No extra handling needed.
  2. vdev is still alive. Use iommufd_object_tombstone_user() to
     destroy vdev and tombstone the vdev ID.
  3. vdev is being destroyed by userspace. The vdev ID is already freed,
     but vdev destroy handler is not complete. The idev destroy handler
     should wait for vdev destroy completion.

[1]: https://lore.kernel.org/all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1729897278.git.nicolinc@nvidia.com/

Original-by: Nicolin Chen <nicolinc@nvidia.com>
Original-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
Signed-off-by: Xu Yilun <yilun.xu@linux.intel.com>
---
 drivers/iommu/iommufd/device.c          | 43 +++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h | 11 +++++++
 drivers/iommu/iommufd/main.c            |  1 +
 drivers/iommu/iommufd/viommu.c          | 33 +++++++++++++++++--
 4 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..908a94a27bab 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -137,11 +137,54 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
 	}
 }
 
+static void iommufd_device_remove_vdev(struct iommufd_device *idev)
+{
+	bool vdev_removing = false;
+
+	mutex_lock(&idev->igroup->lock);
+	if (idev->vdev) {
+		struct iommufd_vdevice *vdev;
+
+		vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id);
+		if (IS_ERR(vdev)) {
+			/* vdev is removed from xarray, but is not destroyed/freed */
+			vdev_removing = true;
+			goto unlock;
+		}
+
+		/* Should never happen */
+		if (WARN_ON(vdev != idev->vdev)) {
+			idev->vdev = NULL;
+			iommufd_put_object(idev->ictx, &vdev->obj);
+			goto unlock;
+		}
+
+		/*
+		 * vdev cannot be destroyed after refcount_inc, safe to release
+		 * idev->igroup->lock and use idev->vdev afterward.
+		 */
+		refcount_inc(&idev->vdev->obj.users);
+		iommufd_put_object(idev->ictx, &idev->vdev->obj);
+	}
+unlock:
+	mutex_unlock(&idev->igroup->lock);
+
+	if (vdev_removing) {
+		if (!wait_event_timeout(idev->ictx->destroy_wait,
+					!idev->vdev,
+					msecs_to_jiffies(60000)))
+			pr_crit("Time out waiting for iommufd vdevice removed\n");
+	} else if (idev->vdev) {
+		iommufd_object_tombstone_user(idev->ictx, &idev->vdev->obj);
+	}
+}
+
 void iommufd_device_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_device *idev =
 		container_of(obj, struct iommufd_device, obj);
 
+	iommufd_device_remove_vdev(idev);
 	iommu_device_release_dma_owner(idev->dev);
 	iommufd_put_group(idev->igroup);
 	if (!iommufd_selftest_is_mock_dev(idev->dev))
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index fbc9ef78d81f..f58aa4439c53 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -446,6 +446,7 @@ struct iommufd_device {
 	/* always the physical device */
 	struct device *dev;
 	bool enforce_cache_coherency;
+	struct iommufd_vdevice *vdev;
 };
 
 static inline struct iommufd_device *
@@ -621,6 +622,7 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
 void iommufd_viommu_destroy(struct iommufd_object *obj);
 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
 void iommufd_vdevice_destroy(struct iommufd_object *obj);
+void iommufd_vdevice_abort(struct iommufd_object *obj);
 
 struct iommufd_vdevice {
 	struct iommufd_object obj;
@@ -628,8 +630,17 @@ struct iommufd_vdevice {
 	struct iommufd_viommu *viommu;
 	struct device *dev;
 	u64 id; /* per-vIOMMU virtual ID */
+	struct iommufd_device *idev;
 };
 
+static inline struct iommufd_vdevice *
+iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id)
+{
+	return container_of(iommufd_get_object(ictx, id,
+					       IOMMUFD_OBJ_VDEVICE),
+			    struct iommufd_vdevice, obj);
+}
+
 #ifdef CONFIG_IOMMUFD_TEST
 int iommufd_test(struct iommufd_ucmd *ucmd);
 void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 5fd75aba068b..3f955a123095 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -531,6 +531,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
 	},
 	[IOMMUFD_OBJ_VDEVICE] = {
 		.destroy = iommufd_vdevice_destroy,
+		.abort = iommufd_vdevice_abort,
 	},
 	[IOMMUFD_OBJ_VEVENTQ] = {
 		.destroy = iommufd_veventq_destroy,
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 4577b88c8560..9b062e651ea5 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -84,16 +84,31 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	return rc;
 }
 
-void iommufd_vdevice_destroy(struct iommufd_object *obj)
+void iommufd_vdevice_abort(struct iommufd_object *obj)
 {
 	struct iommufd_vdevice *vdev =
 		container_of(obj, struct iommufd_vdevice, obj);
 	struct iommufd_viommu *viommu = vdev->viommu;
+	struct iommufd_device *idev = vdev->idev;
+
+	lockdep_assert_held(&idev->igroup->lock);
 
 	/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
 	xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
 	refcount_dec(&viommu->obj.users);
 	put_device(vdev->dev);
+	idev->vdev = NULL;
+}
+
+void iommufd_vdevice_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_vdevice *vdev =
+		container_of(obj, struct iommufd_vdevice, obj);
+
+	mutex_lock(&vdev->idev->igroup->lock);
+	iommufd_vdevice_abort(obj);
+	mutex_unlock(&vdev->idev->igroup->lock);
+	wake_up_interruptible_all(&vdev->ictx->destroy_wait);
 }
 
 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
@@ -124,18 +139,28 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
 		goto out_put_idev;
 	}
 
+	mutex_lock(&idev->igroup->lock);
+	if (idev->vdev) {
+		rc = -EEXIST;
+		goto out_unlock_igroup;
+	}
+
 	vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
 	if (IS_ERR(vdev)) {
 		rc = PTR_ERR(vdev);
-		goto out_put_idev;
+		goto out_unlock_igroup;
 	}
 
+	/* vdev can't outlive idev, vdev->idev is always valid, need no refcnt */
+	vdev->idev = idev;
 	vdev->ictx = ucmd->ictx;
 	vdev->id = virt_id;
 	vdev->dev = idev->dev;
 	get_device(idev->dev);
 	vdev->viommu = viommu;
 	refcount_inc(&viommu->obj.users);
+	/* idev->vdev is protected by idev->igroup->lock, need no refcnt */
+	idev->vdev = vdev;
 
 	curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
 	if (curr) {
@@ -148,10 +173,12 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	if (rc)
 		goto out_abort;
 	iommufd_object_finalize(ucmd->ictx, &vdev->obj);
-	goto out_put_idev;
+	goto out_unlock_igroup;
 
 out_abort:
 	iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_unlock_igroup:
+	mutex_unlock(&idev->igroup->lock);
 out_put_idev:
 	iommufd_put_object(ucmd->ictx, &idev->obj);
 out_put_viommu:
-- 
2.25.1


  parent reply	other threads:[~2025-06-23  9:57 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-23  9:49 [PATCH v2 0/4] iommufd: Destroy vdevice on device unbind Xu Yilun
2025-06-23  9:49 ` [PATCH v2 1/4] iommufd: Add iommufd_object_tombstone_user() helper Xu Yilun
2025-06-24 13:35   ` Jason Gunthorpe
2025-06-25  7:24     ` Xu Yilun
2025-06-25  5:51   ` Aneesh Kumar K.V
2025-06-25  8:40     ` Xu Yilun
2025-06-23  9:49 ` [PATCH v2 2/4] iommufd/viommu: Fix the uninitialized iommufd_vdevice::ictx Xu Yilun
2025-06-24  3:24   ` Baolu Lu
2025-06-24  6:35     ` Xu Yilun
2025-06-23  9:49 ` Xu Yilun [this message]
2025-06-24  3:32   ` [PATCH v2 3/4] iommufd: Destroy vdevice on idevice destroy Baolu Lu
2025-06-24  8:11     ` Xu Yilun
2025-06-24  8:28       ` Tian, Kevin
2025-06-24  8:12     ` Tian, Kevin
2025-06-25  1:55       ` Baolu Lu
2025-06-24  6:47   ` Xu Yilun
2025-06-24  8:22   ` Tian, Kevin
2025-06-26  4:59     ` Xu Yilun
2025-06-24 14:53   ` Jason Gunthorpe
2025-06-24 23:57     ` Tian, Kevin
2025-06-25  1:36       ` Jason Gunthorpe
2025-06-25  2:11         ` Tian, Kevin
2025-06-25 12:33           ` Jason Gunthorpe
2025-06-25 10:06     ` Xu Yilun
2025-06-25 12:38       ` Jason Gunthorpe
2025-06-26  3:31         ` Xu Yilun
2025-06-26 14:36           ` Jason Gunthorpe
2025-06-25  6:40   ` Aneesh Kumar K.V
2025-06-25  9:38     ` Xu Yilun
2025-06-23  9:49 ` [PATCH v2 4/4] iommufd/selftest: Add coverage for vdevice tombstone Xu Yilun
2025-06-24 13:41   ` Jason Gunthorpe
2025-06-25  8:29     ` Xu Yilun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250623094946.1714996-4-yilun.xu@linux.intel.com \
    --to=yilun.xu@linux.intel.com \
    --cc=aik@amd.com \
    --cc=aneesh.kumar@kernel.org \
    --cc=baolu.lu@linux.intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=shuah@kernel.org \
    --cc=will@kernel.org \
    --cc=yilun.xu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).