public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Nicolin Chen <nicolinc@nvidia.com>
Cc: Yi Liu <yi.l.liu@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	Longfang Liu <liulongfang@huawei.com>,
	Yan Zhao <yan.y.zhao@intel.com>,
	iommu@lists.linux.dev, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>,
	Jason Gunthorpe <jgg@nvidia.com>
Subject: [PATCH v8 12/12] iommu: Use refcount for fault data access
Date: Thu,  7 Dec 2023 14:43:08 +0800	[thread overview]
Message-ID: <20231207064308.313316-13-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20231207064308.313316-1-baolu.lu@linux.intel.com>

The per-device fault data structure stores information about faults
occurring on a device. Its lifetime spans from IOPF enablement to
disablement. Multiple paths, including IOPF reporting, handling, and
responding, may access it concurrently.

Previously, a mutex protected the fault data from use after free. But
this is not performance friendly due to the critical nature of IOPF
handling paths.

Refine this with a refcount-based approach. The fault data pointer is
obtained within an RCU read region with a refcount. The fault data
pointer is returned for usage only when the pointer is valid and a
refcount is successfully obtained. The fault data is freed with
kfree_rcu(), ensuring data is only freed after all RCU critical regions
complete.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Yan Zhao <yan.y.zhao@intel.com>
---
 include/linux/iommu.h      |  4 ++
 drivers/iommu/io-pgfault.c | 81 +++++++++++++++++++++++++-------------
 2 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 63df77cc0b61..8020bb44a64a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -597,6 +597,8 @@ struct iommu_device {
 /**
  * struct iommu_fault_param - per-device IOMMU fault data
  * @lock: protect pending faults list
+ * @users: user counter to manage the lifetime of the data
+ * @ruc: rcu head for kfree_rcu()
  * @dev: the device that owns this param
  * @queue: IOPF queue
  * @queue_list: index into queue->devices
@@ -606,6 +608,8 @@ struct iommu_device {
  */
 struct iommu_fault_param {
 	struct mutex lock;
+	refcount_t users;
+	struct rcu_head rcu;
 
 	struct device *dev;
 	struct iopf_queue *queue;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 9439eaf54928..2ace32c6d13b 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -26,6 +26,44 @@ void iopf_free_group(struct iopf_group *group)
 }
 EXPORT_SYMBOL_GPL(iopf_free_group);
 
+/*
+ * Return the fault parameter of a device if it exists. Otherwise, return NULL.
+ * On a successful return, the caller takes a reference of this parameter and
+ * should put it after use by calling iopf_put_dev_fault_param().
+ */
+static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
+{
+	struct dev_iommu *param = dev->iommu;
+	struct iommu_fault_param *fault_param;
+
+	if (!param)
+		return NULL;
+
+	rcu_read_lock();
+	fault_param = param->fault_param;
+	if (fault_param && !refcount_inc_not_zero(&fault_param->users))
+		fault_param = NULL;
+	rcu_read_unlock();
+
+	return fault_param;
+}
+
+/* Caller must hold a reference of the fault parameter. */
+static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
+{
+	struct dev_iommu *param = fault_param->dev->iommu;
+
+	rcu_read_lock();
+	if (!refcount_dec_and_test(&fault_param->users)) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+
+	param->fault_param = NULL;
+	kfree_rcu(fault_param, rcu);
+}
+
 /**
  * iommu_handle_iopf - IO Page Fault handler
  * @fault: fault event
@@ -167,15 +205,11 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 {
 	struct iommu_fault_param *fault_param;
 	struct iopf_fault *evt_pending = NULL;
-	struct dev_iommu *param = dev->iommu;
 	int ret = 0;
 
-	mutex_lock(&param->lock);
-	fault_param = param->fault_param;
-	if (!fault_param) {
-		mutex_unlock(&param->lock);
+	fault_param = iopf_get_dev_fault_param(dev);
+	if (!fault_param)
 		return -EINVAL;
-	}
 
 	mutex_lock(&fault_param->lock);
 	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
@@ -196,7 +230,7 @@ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 	}
 done_unlock:
 	mutex_unlock(&fault_param->lock);
-	mutex_unlock(&param->lock);
+	iopf_put_dev_fault_param(fault_param);
 
 	return ret;
 }
@@ -209,7 +243,6 @@ int iommu_page_response(struct device *dev,
 	int ret = -EINVAL;
 	struct iopf_fault *evt;
 	struct iommu_fault_page_request *prm;
-	struct dev_iommu *param = dev->iommu;
 	struct iommu_fault_param *fault_param;
 	const struct iommu_ops *ops = dev_iommu_ops(dev);
 	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
@@ -217,12 +250,9 @@ int iommu_page_response(struct device *dev,
 	if (!ops->page_response)
 		return -ENODEV;
 
-	mutex_lock(&param->lock);
-	fault_param = param->fault_param;
-	if (!fault_param) {
-		mutex_unlock(&param->lock);
+	fault_param = iopf_get_dev_fault_param(dev);
+	if (!fault_param)
 		return -EINVAL;
-	}
 
 	/* Only send response if there is a fault report pending */
 	mutex_lock(&fault_param->lock);
@@ -263,7 +293,8 @@ int iommu_page_response(struct device *dev,
 
 done_unlock:
 	mutex_unlock(&fault_param->lock);
-	mutex_unlock(&param->lock);
+	iopf_put_dev_fault_param(fault_param);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_page_response);
@@ -282,22 +313,15 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
  */
 int iopf_queue_flush_dev(struct device *dev)
 {
-	int ret = 0;
-	struct iommu_fault_param *iopf_param;
-	struct dev_iommu *param = dev->iommu;
+	struct iommu_fault_param *iopf_param = iopf_get_dev_fault_param(dev);
 
-	if (!param)
+	if (!iopf_param)
 		return -ENODEV;
 
-	mutex_lock(&param->lock);
-	iopf_param = param->fault_param;
-	if (iopf_param)
-		flush_workqueue(iopf_param->queue->wq);
-	else
-		ret = -ENODEV;
-	mutex_unlock(&param->lock);
+	flush_workqueue(iopf_param->queue->wq);
+	iopf_put_dev_fault_param(iopf_param);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
 
@@ -389,6 +413,8 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
 	INIT_LIST_HEAD(&fault_param->faults);
 	INIT_LIST_HEAD(&fault_param->partial);
 	fault_param->dev = dev;
+	refcount_set(&fault_param->users, 1);
+	init_rcu_head(&fault_param->rcu);
 	list_add(&fault_param->queue_list, &queue->devices);
 	fault_param->queue = queue;
 
@@ -441,8 +467,7 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
 	list_for_each_entry_safe(iopf, next, &fault_param->partial, list)
 		kfree(iopf);
 
-	param->fault_param = NULL;
-	kfree(fault_param);
+	iopf_put_dev_fault_param(fault_param);
 unlock:
 	mutex_unlock(&param->lock);
 	mutex_unlock(&queue->lock);
-- 
2.34.1


  parent reply	other threads:[~2023-12-07  6:49 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-07  6:42 [PATCH v8 00/12] iommu: Prepare to deliver page faults to user space Lu Baolu
2023-12-07  6:42 ` [PATCH v8 01/12] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-12-07  6:42 ` [PATCH v8 02/12] iommu/arm-smmu-v3: Remove unrecoverable faults reporting Lu Baolu
2023-12-07  6:42 ` [PATCH v8 03/12] iommu: Remove unrecoverable fault data Lu Baolu
2023-12-07  6:43 ` [PATCH v8 04/12] iommu: Cleanup iopf data structure definitions Lu Baolu
2023-12-07  6:43 ` [PATCH v8 05/12] iommu: Merge iopf_device_param into iommu_fault_param Lu Baolu
2023-12-07  6:43 ` [PATCH v8 06/12] iommu: Remove iommu_[un]register_device_fault_handler() Lu Baolu
2023-12-07  6:43 ` [PATCH v8 07/12] iommu: Merge iommu_fault_event and iopf_fault Lu Baolu
2023-12-07  6:43 ` [PATCH v8 08/12] iommu: Prepare for separating SVA and IOPF Lu Baolu
2023-12-07  6:43 ` [PATCH v8 09/12] iommu: Make iommu_queue_iopf() more generic Lu Baolu
2023-12-07  6:43 ` [PATCH v8 10/12] iommu: Separate SVA and IOPF Lu Baolu
2023-12-07  6:43 ` [PATCH v8 11/12] iommu: Refine locking for per-device fault data management Lu Baolu
2023-12-11 14:50   ` Jason Gunthorpe
2023-12-07  6:43 ` Lu Baolu [this message]
2023-12-11 15:12   ` [PATCH v8 12/12] iommu: Use refcount for fault data access Jason Gunthorpe
2023-12-12  3:44     ` Baolu Lu
2023-12-12 15:18       ` Jason Gunthorpe
2023-12-11 15:24   ` Jason Gunthorpe
2023-12-12  5:07     ` Baolu Lu
2023-12-12 15:18       ` Jason Gunthorpe
2023-12-13  2:19         ` Baolu Lu
2023-12-12  5:17     ` Baolu Lu
2023-12-12 15:14       ` Jason Gunthorpe
2023-12-13  2:14         ` Baolu Lu
2023-12-12  5:23     ` Baolu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231207064308.313316-13-baolu.lu@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liulongfang@huawei.com \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=will@kernel.org \
    --cc=yan.y.zhao@intel.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox