All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rc 0/2] Do not UAF during iommufd_put_object()
@ 2023-11-21  1:28 Jason Gunthorpe
  2023-11-21  1:28 ` [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object() Jason Gunthorpe
  2023-11-21  1:28 ` [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object() Jason Gunthorpe
  0 siblings, 2 replies; 6+ messages in thread
From: Jason Gunthorpe @ 2023-11-21  1:28 UTC (permalink / raw)
  To: iommu, Kevin Tian
  Cc: Lu Baolu, Eric Auger, Lixiao Yang, Matthew Rosato, Nicolin Chen,
	patches, syzbot+7574ebfe589049630608, syzbot+d31adfb277377ef8fcba,
	Yi Liu

The mixture of kernel and user space lifecycle objects continues to be
complicated inside iommufd. The obj->destroy_rwsem is used to bring order
to the kernel driver destruction sequence but it cannot be sequenced right
with the other refcounts so we end up possibly UAF'ing.

Fix it by using two refcounts and a wait queue to sequence the destruction
process.

Jason Gunthorpe (2):
  iommufd: Add iommufd_ctx to iommufd_put_object()
  iommufd: Do not UAF during iommufd_put_object()

 drivers/iommu/iommufd/device.c          |  14 +--
 drivers/iommu/iommufd/hw_pagetable.c    |   8 +-
 drivers/iommu/iommufd/ioas.c            |  14 +--
 drivers/iommu/iommufd/iommufd_private.h |  51 ++++++---
 drivers/iommu/iommufd/main.c            | 131 ++++++++++++------------
 drivers/iommu/iommufd/selftest.c        |  14 +--
 drivers/iommu/iommufd/vfio_compat.c     |  18 ++--
 7 files changed, 136 insertions(+), 114 deletions(-)


base-commit: 98b1cc82c4affc16f5598d4fa14b1858671b2263
-- 
2.42.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object()
  2023-11-21  1:28 [PATCH rc 0/2] Do not UAF during iommufd_put_object() Jason Gunthorpe
@ 2023-11-21  1:28 ` Jason Gunthorpe
  2023-11-21  3:42   ` Tian, Kevin
  2023-11-21  1:28 ` [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object() Jason Gunthorpe
  1 sibling, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2023-11-21  1:28 UTC (permalink / raw)
  To: iommu, Kevin Tian
  Cc: Lu Baolu, Eric Auger, Lixiao Yang, Matthew Rosato, Nicolin Chen,
	patches, syzbot+7574ebfe589049630608, syzbot+d31adfb277377ef8fcba,
	Yi Liu

Will be used in the next patch.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 14 +++++++-------
 drivers/iommu/iommufd/hw_pagetable.c    |  8 ++++----
 drivers/iommu/iommufd/ioas.c            | 14 +++++++-------
 drivers/iommu/iommufd/iommufd_private.h |  3 ++-
 drivers/iommu/iommufd/selftest.c        | 14 +++++++-------
 drivers/iommu/iommufd/vfio_compat.c     | 18 +++++++++---------
 6 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 59d3a07300d934..873630c111c1fc 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -571,7 +571,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
 			continue;
 		destroy_hwpt = (*do_attach)(idev, hwpt);
 		if (IS_ERR(destroy_hwpt)) {
-			iommufd_put_object(&hwpt->obj);
+			iommufd_put_object(idev->ictx, &hwpt->obj);
 			/*
 			 * -EINVAL means the domain is incompatible with the
 			 * device. Other error codes should propagate to
@@ -583,7 +583,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
 			goto out_unlock;
 		}
 		*pt_id = hwpt->obj.id;
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(idev->ictx, &hwpt->obj);
 		goto out_unlock;
 	}
 
@@ -652,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
 		destroy_hwpt = ERR_PTR(-EINVAL);
 		goto out_put_pt_obj;
 	}
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(idev->ictx, pt_obj);
 
 	/* This destruction has to be after we unlock everything */
 	if (destroy_hwpt)
@@ -660,7 +660,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
 	return 0;
 
 out_put_pt_obj:
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(idev->ictx, pt_obj);
 	return PTR_ERR(destroy_hwpt);
 }
 
@@ -792,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
 	if (IS_ERR(ioas))
 		return PTR_ERR(ioas);
 	rc = iommufd_access_change_ioas(access, ioas);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(access->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -941,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 
 		access->ops->unmap(access->data, iova, length);
 
-		iommufd_put_object(&access->obj);
+		iommufd_put_object(access->ictx, &access->obj);
 		xa_lock(&ioas->iopt.access_list);
 	}
 	xa_unlock(&ioas->iopt.access_list);
@@ -1243,6 +1243,6 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
 out_free:
 	kfree(data);
 out_put:
-	iommufd_put_object(&idev->obj);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 2abbeafdbd22d8..cbb5df0a6c32f8 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -318,9 +318,9 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	if (ioas)
 		mutex_unlock(&ioas->mutex);
 out_put_pt:
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(ucmd->ictx, pt_obj);
 out_put_idev:
-	iommufd_put_object(&idev->obj);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
 	return rc;
 }
 
@@ -345,7 +345,7 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
 	rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain,
 				     enable);
 
-	iommufd_put_object(&hwpt_paging->common.obj);
+	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
 	return rc;
 }
 
@@ -368,6 +368,6 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
 	rc = iopt_read_and_clear_dirty_data(
 		&ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd);
 
-	iommufd_put_object(&hwpt_paging->common.obj);
+	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index d5624577f79f1b..74224827654815 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
 		rc = -EMSGSIZE;
 out_put:
 	up_read(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -175,7 +175,7 @@ int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd)
 		interval_tree_remove(node, &allowed_iova);
 		kfree(container_of(node, struct iopt_allowed, node));
 	}
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
 	cmd->iova = iova;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 out_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
 		return PTR_ERR(src_ioas);
 	rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
 			    &pages_list);
-	iommufd_put_object(&src_ioas->obj);
+	iommufd_put_object(ucmd->ictx, &src_ioas->obj);
 	if (rc)
 		return rc;
 
@@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
 	cmd->dst_iova = iova;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 out_put_dst:
-	iommufd_put_object(&dst_ioas->obj);
+	iommufd_put_object(ucmd->ictx, &dst_ioas->obj);
 out_pages:
 	iopt_free_pages_list(&pages_list);
 	return rc;
@@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 out_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
 		rc = -EOPNOTSUPP;
 	}
 
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index a74cfefffbc6c5..f918a22f0d4800 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -154,7 +154,8 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj)
 
 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 					  enum iommufd_object_type type);
-static inline void iommufd_put_object(struct iommufd_object *obj)
+static inline void iommufd_put_object(struct iommufd_ctx *ictx,
+				      struct iommufd_object *obj)
 {
 	refcount_dec(&obj->users);
 	up_read(&obj->destroy_rwsem);
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 5d93434003d8ad..022ef8f55088a6 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -86,7 +86,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
 	if (IS_ERR(ioas))
 		return;
 	*iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 }
 
 struct mock_iommu_domain {
@@ -500,7 +500,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
 		return hwpt;
 	if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED ||
 	    hwpt->domain->ops != mock_ops.default_domain_ops) {
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(ucmd->ictx, &hwpt->obj);
 		return ERR_PTR(-EINVAL);
 	}
 	*mock = container_of(hwpt->domain, struct mock_iommu_domain, domain);
@@ -518,7 +518,7 @@ get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id,
 		return hwpt;
 	if (hwpt->domain->type != IOMMU_DOMAIN_NESTED ||
 	    hwpt->domain->ops != &domain_nested_ops) {
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(ucmd->ictx, &hwpt->obj);
 		return ERR_PTR(-EINVAL);
 	}
 	*mock_nested = container_of(hwpt->domain,
@@ -681,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 out_dev_obj:
-	iommufd_put_object(dev_obj);
+	iommufd_put_object(ucmd->ictx, dev_obj);
 	return rc;
 }
 
@@ -699,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
 	down_write(&ioas->iopt.iova_rwsem);
 	rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
 	up_write(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -754,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
 	rc = 0;
 
 out_put:
-	iommufd_put_object(&hwpt->obj);
+	iommufd_put_object(ucmd->ictx, &hwpt->obj);
 	return rc;
 }
 
@@ -1233,7 +1233,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
 out_free:
 	kvfree(tmp);
 out_put:
-	iommufd_put_object(&hwpt->obj);
+	iommufd_put_object(ucmd->ictx, &hwpt->obj);
 	return rc;
 }
 
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 538fbf76354d13..a3ad5f0b6c59dd 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
 	if (IS_ERR(ioas))
 		return PTR_ERR(ioas);
 	*out_ioas_id = ioas->obj.id;
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
@@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
 
 	if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
 		ret = 0;
-		iommufd_put_object(&ictx->vfio_ioas->obj);
+		iommufd_put_object(ictx, &ictx->vfio_ioas->obj);
 		goto out_abort;
 	}
 	ictx->vfio_ioas = ioas;
@@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 		if (IS_ERR(ioas))
 			return PTR_ERR(ioas);
 		cmd->ioas_id = ioas->obj.id;
-		iommufd_put_object(&ioas->obj);
+		iommufd_put_object(ucmd->ictx, &ioas->obj);
 		return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 	case IOMMU_VFIO_IOAS_SET:
@@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 		xa_lock(&ucmd->ictx->objects);
 		ucmd->ictx->vfio_ioas = ioas;
 		xa_unlock(&ucmd->ictx->objects);
-		iommufd_put_object(&ioas->obj);
+		iommufd_put_object(ucmd->ictx, &ioas->obj);
 		return 0;
 
 	case IOMMU_VFIO_IOAS_CLEAR:
@@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
 	iova = map.iova;
 	rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
 				 map.size, iommu_prot, 0);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -249,7 +249,7 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
 		rc = -EFAULT;
 
 err_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -272,7 +272,7 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
 	}
 	mutex_unlock(&ioas->mutex);
 
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
 	 */
 	if (type == VFIO_TYPE1_IOMMU)
 		rc = iopt_disable_large_pages(&ioas->iopt);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
 
 out_put:
 	up_read(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object()
  2023-11-21  1:28 [PATCH rc 0/2] Do not UAF during iommufd_put_object() Jason Gunthorpe
  2023-11-21  1:28 ` [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object() Jason Gunthorpe
@ 2023-11-21  1:28 ` Jason Gunthorpe
  2023-11-21  3:49   ` Tian, Kevin
  1 sibling, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2023-11-21  1:28 UTC (permalink / raw)
  To: iommu, Kevin Tian
  Cc: Lu Baolu, Eric Auger, Lixiao Yang, Matthew Rosato, Nicolin Chen,
	patches, syzbot+7574ebfe589049630608, syzbot+d31adfb277377ef8fcba,
	Yi Liu

The mixture of kernel and user space lifecycle objects continues to be
complicated inside iommufd. The obj->destroy_rwsem is used to bring order
to the kernel driver destruction sequence but it cannot be sequenced right
with the other refcounts so we end up possibly UAF'ing:

  BUG: KASAN: slab-use-after-free in __up_read+0x627/0x750 kernel/locking/rwsem.c:1342
  Read of size 8 at addr ffff888073cde868 by task syz-executor934/6535

  CPU: 1 PID: 6535 Comm: syz-executor934 Not tainted 6.6.0-rc7-syzkaller-00195-g2af9b20dbb39 #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023
  Call Trace:
   <TASK>
   __dump_stack lib/dump_stack.c:88 [inline]
   dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
   print_address_description mm/kasan/report.c:364 [inline]
   print_report+0xc4/0x620 mm/kasan/report.c:475
   kasan_report+0xda/0x110 mm/kasan/report.c:588
   __up_read+0x627/0x750 kernel/locking/rwsem.c:1342
   iommufd_put_object drivers/iommu/iommufd/iommufd_private.h:149 [inline]
   iommufd_vfio_ioas+0x46c/0x580 drivers/iommu/iommufd/vfio_compat.c:146
   iommufd_fops_ioctl+0x347/0x4d0 drivers/iommu/iommufd/main.c:398
   vfs_ioctl fs/ioctl.c:51 [inline]
   __do_sys_ioctl fs/ioctl.c:871 [inline]
   __se_sys_ioctl fs/ioctl.c:857 [inline]
   __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857
   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
   do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

There are two races here, the more obvious one:

     CPU 0                                 CPU 1
 iommufd_put_object()
                                          iommufd_destroy()
  refcount_dec(&obj->users)

 	                                   iommufd_object_remove()
					   kfree()
  up_read(&obj->destroy_rwsem) // Boom

And there is also perhaps some possibility that the rwsem could hit an
issue:

     CPU 0                                 CPU 1
 iommufd_put_object()
                                         iommufd_object_destroy_user()
  refcount_dec(&obj->users);
 	                                  down_write(&obj->destroy_rwsem)
  up_read(&obj->destroy_rwsem);
                                             atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
      tmp = atomic_long_add_return_release()
                                             rwsem_try_write_lock()
 	                                  iommufd_object_remove()
	                                  up_write(&obj->destroy_rwsem)
					  kfree()
      clear_nonspinnable() // Boom

Fix this by reorganizing this again so that two refcounts are used to keep
track of things with a rule that users == 0 && shortterm_users == 0 means
no other threads have that memory. Put a wait_queue in the iommufd_ctx
object that is triggered when any sub object reaches a 0
shortterm_users. This allows the same wait for userspace ioctls to finish
behavior that the rwsem was providing.

This is weaker still than the prior versions:

 - There is no bias on shortterm_users so if some thread is waiting to
   destroy other threads can continue to get new read sides

 - If destruction fails, eg because of an active in-kernel user, then
   shortterm_users will have cycled to zero momentarily blocking new users

 - If userspace races destroy with other userspace operations they
   continue to get an EBUSY since we still can't intermix looking up an ID
   and sleeping for its unref

In all cases these are things that userspace brings on itself, correct
programs will not hit them.

Fixes: 99f98a7c0d69 ("iommufd: IOMMUFD_DESTROY should not increase the refcount")
Reported-by: syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000055ef9a0609336580@google.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h |  48 +++++++--
 drivers/iommu/iommufd/main.c            | 131 ++++++++++++------------
 2 files changed, 100 insertions(+), 79 deletions(-)

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f918a22f0d4800..45ffc6d8a69955 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -21,6 +21,7 @@ struct iommufd_ctx {
 	struct file *file;
 	struct xarray objects;
 	struct xarray groups;
+	wait_queue_head_t destroy_wait;
 
 	u8 account_mode;
 	/* Compatibility with VFIO no iommu */
@@ -135,7 +136,7 @@ enum iommufd_object_type {
 
 /* Base struct for all objects with a userspace ID handle. */
 struct iommufd_object {
-	struct rw_semaphore destroy_rwsem;
+	refcount_t shortterm_users;
 	refcount_t users;
 	enum iommufd_object_type type;
 	unsigned int id;
@@ -143,12 +144,9 @@ struct iommufd_object {
 
 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
 {
-	if (!down_read_trylock(&obj->destroy_rwsem))
+	if (!refcount_inc_not_zero(&obj->shortterm_users))
 		return false;
-	if (!refcount_inc_not_zero(&obj->users)) {
-		up_read(&obj->destroy_rwsem);
-		return false;
-	}
+	refcount_inc(&obj->users);
 	return true;
 }
 
@@ -157,8 +155,13 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 static inline void iommufd_put_object(struct iommufd_ctx *ictx,
 				      struct iommufd_object *obj)
 {
+	/*
+	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
+	 * a spurious !0 users with a 0 shortterm_users.
+	 */
 	refcount_dec(&obj->users);
-	up_read(&obj->destroy_rwsem);
+	if (refcount_dec_and_test(&obj->shortterm_users))
+		wake_up_interruptible_all(&ictx->destroy_wait);
 }
 
 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
@@ -166,17 +169,40 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
 				      struct iommufd_object *obj);
 void iommufd_object_finalize(struct iommufd_ctx *ictx,
 			     struct iommufd_object *obj);
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-				   struct iommufd_object *obj, bool allow_fail);
+
+enum {
+	REMOVE_WAIT_SHORTTERM = 1,
+};
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+			  struct iommufd_object *to_destroy, u32 id,
+			  unsigned int flags);
+
+/*
+ * The caller holds a users refcount and wants to destroy the object. In all
+ * cases the caller no longer has a reference on obj.
+ */
 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
 					       struct iommufd_object *obj)
 {
-	__iommufd_object_destroy_user(ictx, obj, false);
+	int ret;
+
+	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+
+	/*
+	 * If there is a bug and we couldn't destroy the object then we did put
+	 * back the caller's refcount and will eventually try to free it again
+	 * during close.
+	 */
+	WARN_ON(ret);
 }
+
+/*
+ * Used by autodomains to clean up the hwpt object if it was not used manually.
+ */
 static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
 					     struct iommufd_object *obj)
 {
-	__iommufd_object_destroy_user(ictx, obj, true);
+	iommufd_object_remove(ictx, obj, obj->id, 0);
 }
 
 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 45b9d40773b13a..2d46324ec7f340 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 					     size_t size,
 					     enum iommufd_object_type type)
 {
-	static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX];
 	struct iommufd_object *obj;
 	int rc;
 
@@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 	obj->type = type;
-	/*
-	 * In most cases the destroy_rwsem is obtained with try so it doesn't
-	 * interact with lockdep, however on destroy we have to sleep. This
-	 * means if we have to destroy an object while holding a get on another
-	 * object it triggers lockdep. Using one locking class per object type
-	 * is a simple and reasonable way to avoid this.
-	 */
-	__init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem",
-		     &obj_keys[type]);
+	/* Starts out bias'd by 1 until it is removed from the xarray */
+	refcount_set(&obj->shortterm_users, 1);
 	refcount_set(&obj->users, 1);
 
 	/*
@@ -131,90 +123,92 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 
 /*
  * Remove the given object id from the xarray if the only reference to the
- * object is held by the xarray. The caller must call ops destroy().
+ * object is held by the xarray.
  */
-static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
-						    u32 id, bool extra_put)
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+			  struct iommufd_object *to_destroy, u32 id,
+			  unsigned int flags)
 {
 	struct iommufd_object *obj;
 	XA_STATE(xas, &ictx->objects, id);
+	bool zerod_shortterm = false;
+	int ret;
+
+	/*
+	 * The purpose of the shortterm_users is to ensure deterministic
+	 * destruction of objects used by external drivers and destroyed by this
+	 * function. Any temporary increment of the refcount must increment
+	 * shortterm_users, such as during ioctl execution.
+	 */
+	if (flags & REMOVE_WAIT_SHORTTERM) {
+		if (!refcount_dec_and_test(&to_destroy->shortterm_users)) {
+			if (!wait_event_timeout(
+				    ictx->destroy_wait,
+				    refcount_read(
+					    &to_destroy->shortterm_users) == 0,
+				    msecs_to_jiffies(10000))) {
+				pr_crit("Time out waiting for iommufd object to become free\n");
+				refcount_inc(&to_destroy->shortterm_users);
+				return -EBUSY;
+			}
+		}
+		zerod_shortterm = true;
+	}
 
 	xa_lock(&ictx->objects);
 	obj = xas_load(&xas);
-	if (xa_is_zero(obj) || !obj) {
-		obj = ERR_PTR(-ENOENT);
-		goto out_xa;
+	if (to_destroy) {
+		if (WARN_ON(obj != to_destroy)) {
+			ret = -ENOENT;
+			goto err_xa;
+		}
+
+		/*
+		 * If the caller is holding a ref on obj we put it here under
+		 * the spinlock.
+		 */
+		refcount_dec(&obj->users);
+	} else if (xa_is_zero(obj) || !obj) {
+		ret = -ENOENT;
+		goto err_xa;
 	}
 
-	/*
-	 * If the caller is holding a ref on obj we put it here under the
-	 * spinlock.
-	 */
-	if (extra_put)
-		refcount_dec(&obj->users);
+	/* Both refcounts must reach zero before we can continue */
+	if (!zerod_shortterm && !refcount_dec_if_one(&obj->shortterm_users)) {
+		ret = -EBUSY;
+		goto err_xa;
+	} else {
+		zerod_shortterm = true;
+	}
 
 	if (!refcount_dec_if_one(&obj->users)) {
-		obj = ERR_PTR(-EBUSY);
-		goto out_xa;
+		ret = -EBUSY;
+		goto err_xa;
 	}
 
 	xas_store(&xas, NULL);
 	if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
 		ictx->vfio_ioas = NULL;
-
-out_xa:
 	xa_unlock(&ictx->objects);
 
-	/* The returned object reference count is zero */
-	return obj;
-}
-
-/*
- * The caller holds a users refcount and wants to destroy the object. Returns
- * true if the object was destroyed. In all cases the caller no longer has a
- * reference on obj.
- */
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-				   struct iommufd_object *obj, bool allow_fail)
-{
-	struct iommufd_object *ret;
-
-	/*
-	 * The purpose of the destroy_rwsem is to ensure deterministic
-	 * destruction of objects used by external drivers and destroyed by this
-	 * function. Any temporary increment of the refcount must hold the read
-	 * side of this, such as during ioctl execution.
-	 */
-	down_write(&obj->destroy_rwsem);
-	ret = iommufd_object_remove(ictx, obj->id, true);
-	up_write(&obj->destroy_rwsem);
-
-	if (allow_fail && IS_ERR(ret))
-		return;
-
-	/*
-	 * If there is a bug and we couldn't destroy the object then we did put
-	 * back the caller's refcount and will eventually try to free it again
-	 * during close.
-	 */
-	if (WARN_ON(IS_ERR(ret)))
-		return;
-
 	iommufd_object_ops[obj->type].destroy(obj);
 	kfree(obj);
+	return 0;
+
+err_xa:
+	if (zerod_shortterm)
+		refcount_set(&obj->shortterm_users, 1);
+	xa_unlock(&ictx->objects);
+
+	/* The returned object reference count is zero */
+	return ret;
 }
 
 static int iommufd_destroy(struct iommufd_ucmd *ucmd)
 {
 	struct iommu_destroy *cmd = ucmd->cmd;
-	struct iommufd_object *obj;
 
-	obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-	iommufd_object_ops[obj->type].destroy(obj);
-	kfree(obj);
-	return 0;
+	return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0);
 }
 
 static int iommufd_fops_open(struct inode *inode, struct file *filp)
@@ -238,6 +232,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
 	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
 	xa_init(&ictx->groups);
 	ictx->file = filp;
+	init_waitqueue_head(&ictx->destroy_wait);
 	filp->private_data = ictx;
 	return 0;
 }
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object()
  2023-11-21  1:28 ` [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object() Jason Gunthorpe
@ 2023-11-21  3:42   ` Tian, Kevin
  0 siblings, 0 replies; 6+ messages in thread
From: Tian, Kevin @ 2023-11-21  3:42 UTC (permalink / raw)
  To: Jason Gunthorpe, iommu@lists.linux.dev
  Cc: Lu Baolu, Eric Auger, Lixiao Yang, Matthew Rosato, Nicolin Chen,
	patches@lists.linux.dev,
	syzbot+7574ebfe589049630608@syzkaller.appspotmail.com,
	syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, November 21, 2023 9:28 AM
> 
> Will be used in the next patch.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object()
  2023-11-21  1:28 ` [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object() Jason Gunthorpe
@ 2023-11-21  3:49   ` Tian, Kevin
  2023-11-21 13:04     ` Jason Gunthorpe
  0 siblings, 1 reply; 6+ messages in thread
From: Tian, Kevin @ 2023-11-21  3:49 UTC (permalink / raw)
  To: Jason Gunthorpe, iommu@lists.linux.dev
  Cc: Lu Baolu, Eric Auger, Lixiao Yang, Matthew Rosato, Nicolin Chen,
	patches@lists.linux.dev,
	syzbot+7574ebfe589049630608@syzkaller.appspotmail.com,
	syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com, Liu, Yi L

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, November 21, 2023 9:28 AM
> +
> +/*
> + * The caller holds a users refcount and wants to destroy the object. In all
> + * cases the caller no longer has a reference on obj.

It's unclear which refcnt is being talked here given we have two now.

Actually none of them matches this description e.g. if wait shortterm timeouts
then both users and shortterm_users are left intact.

> + */
>  static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
>  					       struct iommufd_object *obj)
>  {
> -	__iommufd_object_destroy_user(ictx, obj, false);
> +	int ret;
> +
> +	ret = iommufd_object_remove(ictx, obj, obj->id,
> REMOVE_WAIT_SHORTTERM);
> +
> +	/*
> +	 * If there is a bug and we couldn't destroy the object then we did put
> +	 * back the caller's refcount and will eventually try to free it again
> +	 * during close.

This also needs more clarification. obj->users is not recovered upon error.
only obj->shortterm_users is put back.

> +	 */
> +	WARN_ON(ret);
>  }
> +
> +/*
> + * Used by autodomains to clean up the hwpt object if it was not used
> manually.
> + */
>  static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
>  					     struct iommufd_object *obj)
>  {
> -	__iommufd_object_destroy_user(ictx, obj, true);
> +	iommufd_object_remove(ictx, obj, obj->id, 0);

since there is a flag parameter now can we introduce a new flag bit
to represent extra_put? having both obj and obj->id in one invocation
as the hint for extra_put is a bit vague.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object()
  2023-11-21  3:49   ` Tian, Kevin
@ 2023-11-21 13:04     ` Jason Gunthorpe
  0 siblings, 0 replies; 6+ messages in thread
From: Jason Gunthorpe @ 2023-11-21 13:04 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: iommu@lists.linux.dev, Lu Baolu, Eric Auger, Lixiao Yang,
	Matthew Rosato, Nicolin Chen, patches@lists.linux.dev,
	syzbot+7574ebfe589049630608@syzkaller.appspotmail.com,
	syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com, Liu, Yi L

On Tue, Nov 21, 2023 at 03:49:55AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, November 21, 2023 9:28 AM
> > +
> > +/*
> > + * The caller holds a users refcount and wants to destroy the object. In all
> > + * cases the caller no longer has a reference on obj.
> 
> It's unclear which refcnt is being talked here given we have two now.

It says: "users refcount"

At this point the shortterm_users refcount is held by the
xarray. Let's add a note about that

> Actually none of them matches this description e.g. if wait shortterm timeouts
> then both users and shortterm_users are left intact.

shortterm_users was decremented and the object is leaked in that
case. It is a should-never-happen flow.
 
> >  static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
> >  					       struct iommufd_object *obj)
> >  {
> > -	__iommufd_object_destroy_user(ictx, obj, false);
> > +	int ret;
> > +
> > +	ret = iommufd_object_remove(ictx, obj, obj->id,
> > REMOVE_WAIT_SHORTTERM);
> > +
> > +	/*
> > +	 * If there is a bug and we couldn't destroy the object then we did put
> > +	 * back the caller's refcount and will eventually try to free it again
> > +	 * during close.
> 
> This also needs more clarification. obj->users is not recovered upon error.
> only obj->shortterm_users is put back.

And it doesn't always happen anyhow.

I'll work on this some more, it is really tricky.

> > +	 */
> > +	WARN_ON(ret);
> >  }
> > +
> > +/*
> > + * Used by autodomains to clean up the hwpt object if it was not used
> > manually.
> > + */
> >  static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
> >  					     struct iommufd_object *obj)
> >  {
> > -	__iommufd_object_destroy_user(ictx, obj, true);
> > +	iommufd_object_remove(ictx, obj, obj->id, 0);
> 
> since there is a flag parameter now can we introduce a new flag bit
> to represent extra_put? having both obj and obj->id in one invocation
> as the hint for extra_put is a bit vague.

If the user has a pointer to the object then the user must also have a
ref to the object, basic ref counting rules.

Jason

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-11-21 13:04 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-21  1:28 [PATCH rc 0/2] Do not UAF during iommufd_put_object() Jason Gunthorpe
2023-11-21  1:28 ` [PATCH rc 1/2] iommufd: Add iommufd_ctx to iommufd_put_object() Jason Gunthorpe
2023-11-21  3:42   ` Tian, Kevin
2023-11-21  1:28 ` [PATCH rc 2/2] iommufd: Do not UAF during iommufd_put_object() Jason Gunthorpe
2023-11-21  3:49   ` Tian, Kevin
2023-11-21 13:04     ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.