All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] iommu: Handle iommu faults for a bad iopf setup
@ 2024-08-15 12:46 Pranjal Shrivastava
  2024-08-15 13:03 ` Jason Gunthorpe
  2024-08-15 14:03 ` Robin Murphy
  0 siblings, 2 replies; 4+ messages in thread
From: Pranjal Shrivastava @ 2024-08-15 12:46 UTC (permalink / raw)
  To: Joerg Roedel, Lu Baolu, Will Deacon, Robin Murphy
  Cc: Mostafa Saleh, iommu, Pranjal Shrivastava, Kunkun Jiang,
	Jason Gunthorpe

The iommu_report_device_fault function was updated to return void while
assuming that drivers only need to call iommu_report_device_fault() for
reporting an iopf. This implementation causes following problems:

1. The drivers rely on the core code to call it's page_reponse,
   however, when a fault is received and no fault capable domain is
   attached / iopf_param is NULL, the ops->page_response is NOT called
   causing the device to stall in case the fault type was PAGE_REQ.

2. The arm_smmu_v3 driver relies on the returned value to log errors
   returning void from iommu_report_device_fault causes these events to
   be missed while logging.

Modify the iommu_report_device_fault function to return -EINVAL for
cases where no fault capable domain is attached or iopf_param was NULL
and calls back to the driver (ops->page_response) in case the fault type
was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers
to log the fault/event as needed.

Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/
Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void")
Signed-off-by: Pranjal Shrivastava <praan@google.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   2 +-
 drivers/iommu/io-pgfault.c                  | 121 ++++++++++++++------
 include/linux/iommu.h                       |   5 +-
 3 files changed, 87 insertions(+), 41 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9bc50bded5af..8a6cd0adfcf2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1777,7 +1777,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
 		goto out_unlock;
 	}
 
-	iommu_report_device_fault(master->dev, &fault_evt);
+	ret = iommu_report_device_fault(master->dev, &fault_evt);
 out_unlock:
 	mutex_unlock(&smmu->streams_mutex);
 	return ret;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 81e9cc6e3164..540ce80228fe 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -115,6 +115,59 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
 	return group;
 }
 
+static struct iommu_attach_handle *find_fault_handler(struct device *dev,
+						     struct iopf_fault *evt)
+{
+	struct iommu_fault *fault = &evt->fault;
+	struct iommu_attach_handle *attach_handle;
+
+	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
+		attach_handle = iommu_attach_handle_get(dev->iommu_group,
+				fault->prm.pasid, 0);
+		if (IS_ERR(attach_handle)) {
+			const struct iommu_ops *ops = dev_iommu_ops(dev);
+
+			if (!ops->user_pasid_table)
+				return NULL;
+			/*
+			 * The iommu driver for this device supports user-
+			 * managed PASID table. Therefore page faults for
+			 * any PASID should go through the NESTING domain
+			 * attached to the device RID.
+			 */
+			attach_handle = iommu_attach_handle_get(
+					dev->iommu_group, IOMMU_NO_PASID,
+					IOMMU_DOMAIN_NESTED);
+			if (IS_ERR(attach_handle))
+				return NULL;
+		}
+	} else {
+		attach_handle = iommu_attach_handle_get(dev->iommu_group,
+				IOMMU_NO_PASID, 0);
+
+		if (IS_ERR(attach_handle))
+			return NULL;
+	}
+
+	if (!attach_handle->domain->iopf_handler)
+		return NULL;
+
+	return attach_handle;
+}
+
+static void iopf_error_response(struct device *dev, struct iopf_fault *evt)
+{
+	const struct iommu_ops *ops = dev_iommu_ops(dev);
+	struct iommu_fault *fault = &evt->fault;
+	struct iommu_page_response resp = {
+		.pasid = fault->prm.pasid,
+		.grpid = fault->prm.grpid,
+		.code = IOMMU_PAGE_RESP_INVALID
+	};
+
+	ops->page_response(dev, evt, &resp);
+}
+
 /**
  * iommu_report_device_fault() - Report fault event to device driver
  * @dev: the device
@@ -155,22 +208,37 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
  * hardware has been set to block the page faults) and the pending page faults
  * have been flushed.
  */
-void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 {
+	struct iommu_attach_handle *attach_handle;
 	struct iommu_fault *fault = &evt->fault;
 	struct iommu_fault_param *iopf_param;
 	struct iopf_group abort_group = {};
 	struct iopf_group *group;
+	int ret = 0;
 
+	attach_handle = find_fault_handler(dev, evt);
+	if (!attach_handle) {
+		ret = -EINVAL;
+		goto err_bad_iopf;
+	}
+
+	/*
+	 * Something has gone wrong if a fault capable domain is attached but no
+	 * iopf_param is setup
+	 */
 	iopf_param = iopf_get_dev_fault_param(dev);
-	if (WARN_ON(!iopf_param))
-		return;
+	if (WARN_ON(!iopf_param)) {
+		ret = -EINVAL;
+		goto err_bad_iopf;
+	}
 
 	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
-		report_partial_fault(iopf_param, fault);
+		ret = report_partial_fault(iopf_param, fault);
 		iopf_put_dev_fault_param(iopf_param);
 		/* A request that is not the last does not need to be ack'd */
-		return;
+
+		return ret;
 	}
 
 	/*
@@ -185,38 +253,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 	if (group == &abort_group)
 		goto err_abort;
 
-	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
-		group->attach_handle = iommu_attach_handle_get(dev->iommu_group,
-							       fault->prm.pasid,
-							       0);
-		if (IS_ERR(group->attach_handle)) {
-			const struct iommu_ops *ops = dev_iommu_ops(dev);
-
-			if (!ops->user_pasid_table)
-				goto err_abort;
-
-			/*
-			 * The iommu driver for this device supports user-
-			 * managed PASID table. Therefore page faults for
-			 * any PASID should go through the NESTING domain
-			 * attached to the device RID.
-			 */
-			group->attach_handle =
-				iommu_attach_handle_get(dev->iommu_group,
-							IOMMU_NO_PASID,
-							IOMMU_DOMAIN_NESTED);
-			if (IS_ERR(group->attach_handle))
-				goto err_abort;
-		}
-	} else {
-		group->attach_handle =
-			iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
-		if (IS_ERR(group->attach_handle))
-			goto err_abort;
-	}
-
-	if (!group->attach_handle->domain->iopf_handler)
-		goto err_abort;
+	group->attach_handle = attach_handle;
 
 	/*
 	 * On success iopf_handler must call iopf_group_response() and
@@ -225,7 +262,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 	if (group->attach_handle->domain->iopf_handler(group))
 		goto err_abort;
 
-	return;
+	return 0;
 
 err_abort:
 	dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
@@ -235,6 +272,14 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 		__iopf_free_group(group);
 	else
 		iopf_free_group(group);
+
+	return 0;
+
+err_bad_iopf:
+	if (fault->type == IOMMU_FAULT_PAGE_REQ)
+		iopf_error_response(dev, evt);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 04cbdae0052e..bd722f473635 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1563,7 +1563,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
 void iopf_free_group(struct iopf_group *group);
-void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
 void iopf_group_response(struct iopf_group *group,
 			 enum iommu_page_response_code status);
 #else
@@ -1601,9 +1601,10 @@ static inline void iopf_free_group(struct iopf_group *group)
 {
 }
 
-static inline void
+static inline int
 iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 {
+	return -ENODEV;
 }
 
 static inline void iopf_group_response(struct iopf_group *group,
-- 
2.46.0.184.g6999bdac58-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu: Handle iommu faults for a bad iopf setup
  2024-08-15 12:46 [PATCH v2] iommu: Handle iommu faults for a bad iopf setup Pranjal Shrivastava
@ 2024-08-15 13:03 ` Jason Gunthorpe
  2024-08-15 14:03 ` Robin Murphy
  1 sibling, 0 replies; 4+ messages in thread
From: Jason Gunthorpe @ 2024-08-15 13:03 UTC (permalink / raw)
  To: Pranjal Shrivastava
  Cc: Joerg Roedel, Lu Baolu, Will Deacon, Robin Murphy, Mostafa Saleh,
	iommu, Kunkun Jiang

On Thu, Aug 15, 2024 at 12:46:02PM +0000, Pranjal Shrivastava wrote:
> The iommu_report_device_fault function was updated to return void while
> assuming that drivers only need to call iommu_report_device_fault() for
> reporting an iopf. This implementation causes following problems:
> 
> 1. The drivers rely on the core code to call it's page_reponse,
>    however, when a fault is received and no fault capable domain is
>    attached / iopf_param is NULL, the ops->page_response is NOT called
>    causing the device to stall in case the fault type was PAGE_REQ.
> 
> 2. The arm_smmu_v3 driver relies on the returned value to log errors
>    returning void from iommu_report_device_fault causes these events to
>    be missed while logging.
> 
> Modify the iommu_report_device_fault function to return -EINVAL for
> cases where no fault capable domain is attached or iopf_param was NULL
> and calls back to the driver (ops->page_response) in case the fault type
> was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers
> to log the fault/event as needed.
> 
> Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
> Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/
> Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void")
> Signed-off-by: Pranjal Shrivastava <praan@google.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Pedantically these two Signed-off's should be swapped (I wouldn't
resend for that)

> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   2 +-
>  drivers/iommu/io-pgfault.c                  | 121 ++++++++++++++------
>  include/linux/iommu.h                       |   5 +-
>  3 files changed, 87 insertions(+), 41 deletions(-)

Looks OK to me

Joerg, this would beed to be for-rc

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Thanks,
Jason

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu: Handle iommu faults for a bad iopf setup
  2024-08-15 12:46 [PATCH v2] iommu: Handle iommu faults for a bad iopf setup Pranjal Shrivastava
  2024-08-15 13:03 ` Jason Gunthorpe
@ 2024-08-15 14:03 ` Robin Murphy
  2024-08-15 18:14   ` Pranjal Shrivastava
  1 sibling, 1 reply; 4+ messages in thread
From: Robin Murphy @ 2024-08-15 14:03 UTC (permalink / raw)
  To: Pranjal Shrivastava, Joerg Roedel, Lu Baolu, Will Deacon
  Cc: Mostafa Saleh, iommu, Kunkun Jiang, Jason Gunthorpe

On 15/08/2024 1:46 pm, Pranjal Shrivastava wrote:
[...]> @@ -155,22 +208,37 @@ static struct iopf_group 
*iopf_group_alloc(struct iommu_fault_param *iopf_param,
>    * hardware has been set to block the page faults) and the pending page faults
>    * have been flushed.
>    */
> -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
> +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
>   {
> +	struct iommu_attach_handle *attach_handle;
>   	struct iommu_fault *fault = &evt->fault;
>   	struct iommu_fault_param *iopf_param;
>   	struct iopf_group abort_group = {};
>   	struct iopf_group *group;
> +	int ret = 0;
>   
> +	attach_handle = find_fault_handler(dev, evt);
> +	if (!attach_handle) {
> +		ret = -EINVAL;
> +		goto err_bad_iopf;
> +	}
> +
> +	/*
> +	 * Something has gone wrong if a fault capable domain is attached but no
> +	 * iopf_param is setup
> +	 */
>   	iopf_param = iopf_get_dev_fault_param(dev);
> -	if (WARN_ON(!iopf_param))
> -		return;
> +	if (WARN_ON(!iopf_param)) {
> +		ret = -EINVAL;
> +		goto err_bad_iopf;

Nit: the err_bad_iopf path is only ever returning -EINVAL, so you could 
simply hard-code that there...

> +	}
>   
>   	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
> -		report_partial_fault(iopf_param, fault);
> +		ret = report_partial_fault(iopf_param, fault);

...and declare "ret" here in the one scope it's actually needed.

Thanks,
Robin.

>   		iopf_put_dev_fault_param(iopf_param);
>   		/* A request that is not the last does not need to be ack'd */
> -		return;
> +
> +		return ret;
>   	}
>   
>   	/*

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] iommu: Handle iommu faults for a bad iopf setup
  2024-08-15 14:03 ` Robin Murphy
@ 2024-08-15 18:14   ` Pranjal Shrivastava
  0 siblings, 0 replies; 4+ messages in thread
From: Pranjal Shrivastava @ 2024-08-15 18:14 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Joerg Roedel, Lu Baolu, Will Deacon, Mostafa Saleh, iommu,
	Kunkun Jiang, Jason Gunthorpe

On Thu, Aug 15, 2024 at 03:03:07PM +0100, Robin Murphy wrote:
> On 15/08/2024 1:46 pm, Pranjal Shrivastava wrote:
> [...]> @@ -155,22 +208,37 @@ static struct iopf_group
> *iopf_group_alloc(struct iommu_fault_param *iopf_param,
> >    * hardware has been set to block the page faults) and the pending page faults
> >    * have been flushed.
> >    */
> > -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
> > +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
> >   {
> > +	struct iommu_attach_handle *attach_handle;
> >   	struct iommu_fault *fault = &evt->fault;
> >   	struct iommu_fault_param *iopf_param;
> >   	struct iopf_group abort_group = {};
> >   	struct iopf_group *group;
> > +	int ret = 0;
> > +	attach_handle = find_fault_handler(dev, evt);
> > +	if (!attach_handle) {
> > +		ret = -EINVAL;
> > +		goto err_bad_iopf;
> > +	}
> > +
> > +	/*
> > +	 * Something has gone wrong if a fault capable domain is attached but no
> > +	 * iopf_param is setup
> > +	 */
> >   	iopf_param = iopf_get_dev_fault_param(dev);
> > -	if (WARN_ON(!iopf_param))
> > -		return;
> > +	if (WARN_ON(!iopf_param)) {
> > +		ret = -EINVAL;
> > +		goto err_bad_iopf;
> 
> Nit: the err_bad_iopf path is only ever returning -EINVAL, so you could
> simply hard-code that there...
> 
> > +	}
> >   	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
> > -		report_partial_fault(iopf_param, fault);
> > +		ret = report_partial_fault(iopf_param, fault);
> 
> ...and declare "ret" here in the one scope it's actually needed.

Sure, will refactor this in the next version. Let me also swap the
signed-off-bys and prefix with rc as suggested by Jason in v3.

> 
> Thanks,
> Robin.
> 
> >   		iopf_put_dev_fault_param(iopf_param);
> >   		/* A request that is not the last does not need to be ack'd */
> > -		return;
> > +
> > +		return ret;
> >   	}
> >   	/*

Thanks,
Pranjal

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-08-15 18:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-15 12:46 [PATCH v2] iommu: Handle iommu faults for a bad iopf setup Pranjal Shrivastava
2024-08-15 13:03 ` Jason Gunthorpe
2024-08-15 14:03 ` Robin Murphy
2024-08-15 18:14   ` Pranjal Shrivastava

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.