All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Nicolin Chen <nicolinc@nvidia.com>, Yi Liu <yi.l.liu@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: iommu@lists.linux.dev, linux-kselftest@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>
Subject: [RFC PATCHES 08/17] iommufd: IO page fault delivery initialization and release
Date: Tue, 30 May 2023 13:37:15 +0800	[thread overview]
Message-ID: <20230530053724.232765-9-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20230530053724.232765-1-baolu.lu@linux.intel.com>

Add some housekeeping code for IO page fault dilivery. Add a fault field
in the iommufd_hw_pagetable structure to store pending IO page faults and
other related data.

The fault field is allocated when an IOPF-capable user HWPT (indicated by
IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE being set in the allocation user data)
is allocated. This field exists until the HWPT is destroyed. This also
implies that it is possible to determine whether a HWPT is IOPF capable by
checking the fault field.

When an IOPF-capable HWPT is attached to a device (could also be a PASID of
a device in the future), a fault cookie is allocated and set to the device.
The cookie is cleared and freed when HWPT is detached from the device.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/iommufd/iommufd_private.h | 12 +++++
 drivers/iommu/iommufd/device.c          | 61 +++++++++++++++++++++++--
 drivers/iommu/iommufd/hw_pagetable.c    | 55 ++++++++++++++++++++++
 3 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index e951815f5707..5ff139acc5c0 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -236,6 +236,13 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
 
 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
 
+struct hw_pgtable_fault {
+	struct mutex mutex;
+	struct list_head deliver;
+	struct list_head response;
+	struct eventfd_ctx *trigger;
+};
+
 /*
  * A HW pagetable is called an iommu_domain inside the kernel. This user object
  * allows directly creating and inspecting the domains. Domains that have kernel
@@ -252,6 +259,7 @@ struct iommufd_hw_pagetable {
 	bool msi_cookie : 1;
 	/* Head at iommufd_ioas::hwpt_list */
 	struct list_head hwpt_item;
+	struct hw_pgtable_fault *fault;
 };
 
 struct iommufd_hw_pagetable *
@@ -314,6 +322,10 @@ struct iommufd_device {
 	bool has_user_data;
 };
 
+struct iommufd_fault_cookie {
+	struct iommufd_device *idev;
+};
+
 static inline struct iommufd_device *
 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
 {
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 29b212714e2c..3408f1fc3e9f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -374,6 +374,44 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
 	return 0;
 }
 
+static int iommufd_device_set_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+					   struct iommufd_device *idev,
+					   ioasid_t pasid)
+{
+	struct iommufd_fault_cookie *fcookie, *curr;
+
+	if (!hwpt->fault)
+		return 0;
+
+	fcookie = kzalloc(sizeof(*fcookie), GFP_KERNEL);
+	if (!fcookie)
+		return -ENOMEM;
+	fcookie->idev = idev;
+
+	curr = iommu_set_device_fault_cookie(idev->dev, pasid, fcookie);
+	if (IS_ERR(curr)) {
+		kfree(fcookie);
+		return PTR_ERR(curr);
+	}
+	kfree(curr);
+
+	return 0;
+}
+
+static void iommufd_device_unset_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+					      struct iommufd_device *idev,
+					      ioasid_t pasid)
+{
+	struct iommufd_fault_cookie *curr;
+
+	if (!hwpt->fault)
+		return;
+
+	curr = iommu_set_device_fault_cookie(idev->dev, pasid, NULL);
+	WARN_ON(IS_ERR(curr));
+	kfree(curr);
+}
+
 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				struct iommufd_device *idev)
 {
@@ -398,6 +436,10 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 	if (rc)
 		goto err_unlock;
 
+	rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+	if (rc)
+		goto err_unresv;
+
 	/*
 	 * Only attach to the group once for the first device that is in the
 	 * group. All the other devices will follow this attachment. The user
@@ -408,17 +450,21 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 	if (list_empty(&idev->igroup->device_list)) {
 		rc = iommufd_group_setup_msi(idev->igroup, hwpt);
 		if (rc)
-			goto err_unresv;
+			goto err_unset;
 
 		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
 		if (rc)
-			goto err_unresv;
+			goto err_unset;
 		idev->igroup->hwpt = hwpt;
 	}
+
 	refcount_inc(&hwpt->obj.users);
 	list_add_tail(&idev->group_item, &idev->igroup->device_list);
 	mutex_unlock(&idev->igroup->lock);
 	return 0;
+
+err_unset:
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
 err_unresv:
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
 err_unlock:
@@ -433,6 +479,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
 
 	mutex_lock(&idev->igroup->lock);
 	list_del(&idev->group_item);
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
 	if (list_empty(&idev->igroup->device_list)) {
 		iommu_detach_group(hwpt->domain, idev->igroup->group);
 		idev->igroup->hwpt = NULL;
@@ -502,9 +549,14 @@ iommufd_device_do_replace(struct iommufd_device *idev,
 	if (rc)
 		goto err_unresv;
 
+	iommufd_device_unset_fault_cookie(old_hwpt, idev, 0);
+	rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+	if (rc)
+		goto err_unresv;
+
 	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
 	if (rc)
-		goto err_unresv;
+		goto err_replace;
 
 	if (hwpt->ioas != old_hwpt->ioas) {
 		list_for_each_entry(cur, &igroup->device_list, group_item)
@@ -526,6 +578,9 @@ iommufd_device_do_replace(struct iommufd_device *idev,
 
 	/* Caller must destroy old_hwpt */
 	return old_hwpt;
+err_replace:
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
+	iommufd_device_set_fault_cookie(old_hwpt, idev, 0);
 err_unresv:
 	list_for_each_entry(cur, &igroup->device_list, group_item)
 		iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 47ec7ddd5f5d..d6d550c3d0cc 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -3,12 +3,16 @@
  * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
  */
 #include <linux/iommu.h>
+#include <linux/eventfd.h>
 #include <uapi/linux/iommufd.h>
 
 #include "../iommu-priv.h"
 #include "iommufd_private.h"
 #include "iommufd_test.h"
 
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd);
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault);
+
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_hw_pagetable *hwpt =
@@ -27,6 +31,9 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 
 	if (hwpt->parent)
 		refcount_dec(&hwpt->parent->obj.users);
+
+	if (hwpt->fault)
+		hw_pagetable_fault_free(hwpt->fault);
 	refcount_dec(&hwpt->ioas->obj.users);
 }
 
@@ -255,6 +262,11 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 		goto out_put_pt;
 	}
 
+	if (!parent && (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE)) {
+		rc = -EINVAL;
+		goto out_put_pt;
+	}
+
 	if (klen) {
 		if (!cmd->data_len) {
 			rc = -EINVAL;
@@ -282,6 +294,14 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 		goto out_unlock;
 	}
 
+	if (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE) {
+		hwpt->fault = hw_pagetable_fault_alloc(cmd->event_fd);
+		if (IS_ERR(hwpt->fault)) {
+			rc = PTR_ERR(hwpt->fault);
+			goto out_hwpt;
+		}
+	}
+
 	cmd->out_hwpt_id = hwpt->obj.id;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 	if (rc)
@@ -346,3 +366,38 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
 	iommufd_put_object(&hwpt->obj);
 	return rc;
 }
+
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd)
+{
+	struct hw_pgtable_fault *fault;
+	int rc;
+
+	fault = kzalloc(sizeof(*fault), GFP_KERNEL);
+	if (!fault)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&fault->deliver);
+	INIT_LIST_HEAD(&fault->response);
+	mutex_init(&fault->mutex);
+
+	fault->trigger = eventfd_ctx_fdget(eventfd);
+	if (IS_ERR(fault->trigger)) {
+		rc = PTR_ERR(fault->trigger);
+		goto out_free;
+	}
+
+	return fault;
+
+out_free:
+	kfree(fault);
+	return ERR_PTR(rc);
+}
+
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault)
+{
+	WARN_ON(!list_empty(&fault->deliver));
+	WARN_ON(!list_empty(&fault->response));
+
+	eventfd_ctx_put(fault->trigger);
+	kfree(fault);
+}
-- 
2.34.1


  parent reply	other threads:[~2023-05-30  5:38 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-30  5:37 [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 01/17] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 02/17] iommu: Support asynchronous I/O page fault response Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 03/17] iommu: Add helper to set iopf handler for domain Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 04/17] iommu: Pass device parameter to iopf handler Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 05/17] iommu: Split IO page fault handling from SVA Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 06/17] iommu: Add iommu page fault cookie helpers Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 07/17] iommufd: Add iommu page fault data Lu Baolu
2023-05-30  5:37 ` Lu Baolu [this message]
2023-05-30  5:37 ` [RFC PATCHES 09/17] iommufd: Add iommufd hwpt iopf handler Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 10/17] iommufd: Add IOMMU_HWPT_ALLOC_FLAGS_USER_PASID_TABLE for hwpt_alloc Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 11/17] iommufd: Deliver fault messages to user space Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 12/17] iommufd: Add io page fault response support Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 13/17] iommufd: Add a timer for each iommufd fault data Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 14/17] iommufd: Drain all pending faults when destroying hwpt Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 15/17] iommufd: Allow new hwpt_alloc flags Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 16/17] iommufd/selftest: Add IOPF feature for mock devices Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 17/17] iommufd/selftest: Cover iopf-capable nested hwpt Lu Baolu
2023-05-30 18:50 ` [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Nicolin Chen
2023-05-31  2:10   ` Baolu Lu
2023-05-31  4:12     ` Nicolin Chen
2023-06-25  6:30   ` Baolu Lu
2023-06-25 19:21     ` Nicolin Chen
2023-06-26  3:10       ` Baolu Lu
2023-06-26 18:02         ` Nicolin Chen
2023-06-26 18:33     ` Jason Gunthorpe
2023-06-26 18:33       ` Jason Gunthorpe
2023-06-28  2:00       ` Baolu Lu
2023-06-28 12:49         ` Jason Gunthorpe
2023-06-28 12:49           ` Jason Gunthorpe
2023-06-29  1:07           ` Baolu Lu
2023-05-31  0:33 ` Jason Gunthorpe
2023-05-31  0:33   ` Jason Gunthorpe
2023-05-31  3:17   ` Baolu Lu
2023-06-23  6:18   ` Baolu Lu
2023-06-23 13:50     ` Jason Gunthorpe
2023-06-23 13:50       ` Jason Gunthorpe
2023-06-16 11:32 ` Jean-Philippe Brucker
2023-06-16 11:32   ` Jean-Philippe Brucker
2023-06-19  3:35   ` Baolu Lu
2023-06-26  9:51     ` Jean-Philippe Brucker
2023-06-26  9:51       ` Jean-Philippe Brucker
2023-06-19 12:58   ` Jason Gunthorpe
2023-06-19 12:58     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230530053724.232765-9-baolu.lu@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.