* [PATCH v4 01/10] iommu: Move iommu fault data to linux/iommu.h
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 7:52 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 02/10] iommu/arm-smmu-v3: Remove unrecoverable faults reporting Lu Baolu
` (8 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
The iommu fault data is currently defined in uapi/linux/iommu.h, but is
only used inside the iommu subsystem. Move it to linux/iommu.h, where it
will be more accessible to kernel drivers.
With this done, uapi/linux/iommu.h becomes empty and can be removed from
the tree.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 152 +++++++++++++++++++++++++++++++++-
include/uapi/linux/iommu.h | 161 -------------------------------------
MAINTAINERS | 1 -
3 files changed, 151 insertions(+), 163 deletions(-)
delete mode 100644 include/uapi/linux/iommu.h
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 389fffc0b3a2..18103045bf44 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -13,7 +13,6 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/of.h>
-#include <uapi/linux/iommu.h>
#define IOMMU_READ (1 << 0)
#define IOMMU_WRITE (1 << 1)
@@ -42,6 +41,157 @@ struct iommu_sva;
struct iommu_fault_event;
struct iommu_dma_cookie;
+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+ IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
+ IOMMU_FAULT_PAGE_REQ, /* page request fault */
+};
+
+enum iommu_fault_reason {
+ IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+ /* Could not access the PASID table (fetch caused external abort) */
+ IOMMU_FAULT_REASON_PASID_FETCH,
+
+ /* PASID entry is invalid or has configuration errors */
+ IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+ /*
+ * PASID is out of range (e.g. exceeds the maximum PASID
+ * supported by the IOMMU) or disabled.
+ */
+ IOMMU_FAULT_REASON_PASID_INVALID,
+
+ /*
+ * An external abort occurred fetching (or updating) a translation
+ * table descriptor
+ */
+ IOMMU_FAULT_REASON_WALK_EABT,
+
+ /*
+ * Could not access the page table entry (Bad address),
+ * actual translation fault
+ */
+ IOMMU_FAULT_REASON_PTE_FETCH,
+
+ /* Protection flag check failed */
+ IOMMU_FAULT_REASON_PERMISSION,
+
+ /* access flag check failed */
+ IOMMU_FAULT_REASON_ACCESS,
+
+ /* Output address of a translation stage caused Address Size fault */
+ IOMMU_FAULT_REASON_OOR_ADDRESS,
+};
+
+/**
+ * struct iommu_fault_unrecoverable - Unrecoverable fault data
+ * @reason: reason of the fault, from &enum iommu_fault_reason
+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
+ * @pasid: Process Address Space ID
+ * @perm: requested permission access using by the incoming transaction
+ * (IOMMU_FAULT_PERM_* values)
+ * @addr: offending page address
+ * @fetch_addr: address that caused a fetch abort, if any
+ */
+struct iommu_fault_unrecoverable {
+ __u32 reason;
+#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
+#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
+ __u32 flags;
+ __u32 pasid;
+ __u32 perm;
+ __u64 addr;
+ __u64 fetch_addr;
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
+ * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
+ * must have the same PASID value as the page request. When it is clear,
+ * the page response should not have a PASID.
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
+#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
+ __u32 flags;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 perm;
+ __u64 addr;
+ __u64 private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @padding: reserved for future use (should be zero)
+ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ * @padding2: sets the fault size to allow for future extensions
+ */
+struct iommu_fault {
+ __u32 type;
+ __u32 padding;
+ union {
+ struct iommu_fault_unrecoverable event;
+ struct iommu_fault_page_request prm;
+ __u8 padding2[56];
+ };
+};
+
+/**
+ * enum iommu_page_response_code - Return status of fault handlers
+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is "Success" in PCI PRI.
+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ * this device if possible. This is "Response Failure" in PCI PRI.
+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is "Invalid Request" in PCI PRI.
+ */
+enum iommu_page_response_code {
+ IOMMU_PAGE_RESP_SUCCESS = 0,
+ IOMMU_PAGE_RESP_INVALID,
+ IOMMU_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_page_response - Generic page response information
+ * @argsz: User filled size of this data
+ * @version: API version of this structure
+ * @flags: encodes whether the corresponding fields are valid
+ * (IOMMU_FAULT_PAGE_RESPONSE_* values)
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code from &enum iommu_page_response_code
+ */
+struct iommu_page_response {
+ __u32 argsz;
+#define IOMMU_PAGE_RESP_VERSION_1 1
+ __u32 version;
+#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
+ __u32 flags;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 code;
+};
+
+
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
#define IOMMU_FAULT_WRITE 0x1
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
deleted file mode 100644
index 65d8b0234f69..000000000000
--- a/include/uapi/linux/iommu.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * IOMMU user API definitions
- */
-
-#ifndef _UAPI_IOMMU_H
-#define _UAPI_IOMMU_H
-
-#include <linux/types.h>
-
-#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
-#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
-#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
-#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
-
-/* Generic fault types, can be expanded IRQ remapping fault */
-enum iommu_fault_type {
- IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
- IOMMU_FAULT_PAGE_REQ, /* page request fault */
-};
-
-enum iommu_fault_reason {
- IOMMU_FAULT_REASON_UNKNOWN = 0,
-
- /* Could not access the PASID table (fetch caused external abort) */
- IOMMU_FAULT_REASON_PASID_FETCH,
-
- /* PASID entry is invalid or has configuration errors */
- IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
-
- /*
- * PASID is out of range (e.g. exceeds the maximum PASID
- * supported by the IOMMU) or disabled.
- */
- IOMMU_FAULT_REASON_PASID_INVALID,
-
- /*
- * An external abort occurred fetching (or updating) a translation
- * table descriptor
- */
- IOMMU_FAULT_REASON_WALK_EABT,
-
- /*
- * Could not access the page table entry (Bad address),
- * actual translation fault
- */
- IOMMU_FAULT_REASON_PTE_FETCH,
-
- /* Protection flag check failed */
- IOMMU_FAULT_REASON_PERMISSION,
-
- /* access flag check failed */
- IOMMU_FAULT_REASON_ACCESS,
-
- /* Output address of a translation stage caused Address Size fault */
- IOMMU_FAULT_REASON_OOR_ADDRESS,
-};
-
-/**
- * struct iommu_fault_unrecoverable - Unrecoverable fault data
- * @reason: reason of the fault, from &enum iommu_fault_reason
- * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
- * @pasid: Process Address Space ID
- * @perm: requested permission access using by the incoming transaction
- * (IOMMU_FAULT_PERM_* values)
- * @addr: offending page address
- * @fetch_addr: address that caused a fetch abort, if any
- */
-struct iommu_fault_unrecoverable {
- __u32 reason;
-#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
-#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
- __u32 flags;
- __u32 pasid;
- __u32 perm;
- __u64 addr;
- __u64 fetch_addr;
-};
-
-/**
- * struct iommu_fault_page_request - Page Request data
- * @flags: encodes whether the corresponding fields are valid and whether this
- * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
- * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
- * must have the same PASID value as the page request. When it is clear,
- * the page response should not have a PASID.
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
- * @addr: page address
- * @private_data: device-specific private information
- */
-struct iommu_fault_page_request {
-#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
-#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
-#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 perm;
- __u64 addr;
- __u64 private_data[2];
-};
-
-/**
- * struct iommu_fault - Generic fault data
- * @type: fault type from &enum iommu_fault_type
- * @padding: reserved for future use (should be zero)
- * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
- * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
- * @padding2: sets the fault size to allow for future extensions
- */
-struct iommu_fault {
- __u32 type;
- __u32 padding;
- union {
- struct iommu_fault_unrecoverable event;
- struct iommu_fault_page_request prm;
- __u8 padding2[56];
- };
-};
-
-/**
- * enum iommu_page_response_code - Return status of fault handlers
- * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
- * populated, retry the access. This is "Success" in PCI PRI.
- * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
- * this device if possible. This is "Response Failure" in PCI PRI.
- * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
- * access. This is "Invalid Request" in PCI PRI.
- */
-enum iommu_page_response_code {
- IOMMU_PAGE_RESP_SUCCESS = 0,
- IOMMU_PAGE_RESP_INVALID,
- IOMMU_PAGE_RESP_FAILURE,
-};
-
-/**
- * struct iommu_page_response - Generic page response information
- * @argsz: User filled size of this data
- * @version: API version of this structure
- * @flags: encodes whether the corresponding fields are valid
- * (IOMMU_FAULT_PAGE_RESPONSE_* values)
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @code: response code from &enum iommu_page_response_code
- */
-struct iommu_page_response {
- __u32 argsz;
-#define IOMMU_PAGE_RESP_VERSION_1 1
- __u32 version;
-#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 code;
-};
-
-#endif /* _UAPI_IOMMU_H */
diff --git a/MAINTAINERS b/MAINTAINERS
index 416e0e7599d8..09b67766523a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10889,7 +10889,6 @@ F: drivers/iommu/
F: include/linux/iommu.h
F: include/linux/iova.h
F: include/linux/of_iommu.h
-F: include/uapi/linux/iommu.h
IOMMUFD
M: Jason Gunthorpe <jgg@nvidia.com>
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 01/10] iommu: Move iommu fault data to linux/iommu.h
2023-08-25 2:30 ` [PATCH v4 01/10] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
@ 2023-08-25 7:52 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 7:52 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Jason Gunthorpe
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> The iommu fault data is currently defined in uapi/linux/iommu.h, but is
> only used inside the iommu subsystem. Move it to linux/iommu.h, where it
> will be more accessible to kernel drivers.
>
> With this done, uapi/linux/iommu.h becomes empty and can be removed
> from
> the tree.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 02/10] iommu/arm-smmu-v3: Remove unrecoverable faults reporting
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
2023-08-25 2:30 ` [PATCH v4 01/10] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 7:53 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 03/10] iommu: Remove unrecoverable fault data Lu Baolu
` (7 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu
No device driver registers fault handler to handle the reported
unrecoveraable faults. Remove it to avoid dead code.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 46 ++++++---------------
1 file changed, 13 insertions(+), 33 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e82bf1c449a3..90b281eda73f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1469,7 +1469,6 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
{
int ret;
- u32 reason;
u32 perm = 0;
struct arm_smmu_master *master;
bool ssid_valid = evt[0] & EVTQ_0_SSV;
@@ -1479,16 +1478,9 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
case EVT_ID_TRANSLATION_FAULT:
- reason = IOMMU_FAULT_REASON_PTE_FETCH;
- break;
case EVT_ID_ADDR_SIZE_FAULT:
- reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
- break;
case EVT_ID_ACCESS_FAULT:
- reason = IOMMU_FAULT_REASON_ACCESS;
- break;
case EVT_ID_PERMISSION_FAULT:
- reason = IOMMU_FAULT_REASON_PERMISSION;
break;
default:
return -EOPNOTSUPP;
@@ -1498,6 +1490,9 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
if (evt[1] & EVTQ_1_S2)
return -EFAULT;
+ if (!(evt[1] & EVTQ_1_STALL))
+ return -EOPNOTSUPP;
+
if (evt[1] & EVTQ_1_RnW)
perm |= IOMMU_FAULT_PERM_READ;
else
@@ -1509,32 +1504,17 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
if (evt[1] & EVTQ_1_PnU)
perm |= IOMMU_FAULT_PERM_PRIV;
- if (evt[1] & EVTQ_1_STALL) {
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request) {
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+ .perm = perm,
+ .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+ };
- if (ssid_valid) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
- }
- } else {
- flt->type = IOMMU_FAULT_DMA_UNRECOV;
- flt->event = (struct iommu_fault_unrecoverable) {
- .reason = reason,
- .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
-
- if (ssid_valid) {
- flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
- flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
- }
+ if (ssid_valid) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
}
mutex_lock(&smmu->streams_mutex);
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 02/10] iommu/arm-smmu-v3: Remove unrecoverable faults reporting
2023-08-25 2:30 ` [PATCH v4 02/10] iommu/arm-smmu-v3: Remove unrecoverable faults reporting Lu Baolu
@ 2023-08-25 7:53 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 7:53 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> No device driver registers fault handler to handle the reported
> unrecoveraable faults. Remove it to avoid dead code.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 03/10] iommu: Remove unrecoverable fault data
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
2023-08-25 2:30 ` [PATCH v4 01/10] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-08-25 2:30 ` [PATCH v4 02/10] iommu/arm-smmu-v3: Remove unrecoverable faults reporting Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 7:53 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 04/10] iommu: Cleanup iopf data structure definitions Lu Baolu
` (6 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
The unrecoverable fault data is not used anywhere. Remove it to avoid
dead code.
Suggested-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 70 +------------------------------------------
1 file changed, 1 insertion(+), 69 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 18103045bf44..1c8f33d0a4c1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -48,69 +48,9 @@ struct iommu_dma_cookie;
/* Generic fault types, can be expanded IRQ remapping fault */
enum iommu_fault_type {
- IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
IOMMU_FAULT_PAGE_REQ, /* page request fault */
};
-enum iommu_fault_reason {
- IOMMU_FAULT_REASON_UNKNOWN = 0,
-
- /* Could not access the PASID table (fetch caused external abort) */
- IOMMU_FAULT_REASON_PASID_FETCH,
-
- /* PASID entry is invalid or has configuration errors */
- IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
-
- /*
- * PASID is out of range (e.g. exceeds the maximum PASID
- * supported by the IOMMU) or disabled.
- */
- IOMMU_FAULT_REASON_PASID_INVALID,
-
- /*
- * An external abort occurred fetching (or updating) a translation
- * table descriptor
- */
- IOMMU_FAULT_REASON_WALK_EABT,
-
- /*
- * Could not access the page table entry (Bad address),
- * actual translation fault
- */
- IOMMU_FAULT_REASON_PTE_FETCH,
-
- /* Protection flag check failed */
- IOMMU_FAULT_REASON_PERMISSION,
-
- /* access flag check failed */
- IOMMU_FAULT_REASON_ACCESS,
-
- /* Output address of a translation stage caused Address Size fault */
- IOMMU_FAULT_REASON_OOR_ADDRESS,
-};
-
-/**
- * struct iommu_fault_unrecoverable - Unrecoverable fault data
- * @reason: reason of the fault, from &enum iommu_fault_reason
- * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
- * @pasid: Process Address Space ID
- * @perm: requested permission access using by the incoming transaction
- * (IOMMU_FAULT_PERM_* values)
- * @addr: offending page address
- * @fetch_addr: address that caused a fetch abort, if any
- */
-struct iommu_fault_unrecoverable {
- __u32 reason;
-#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
-#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
- __u32 flags;
- __u32 pasid;
- __u32 perm;
- __u64 addr;
- __u64 fetch_addr;
-};
-
/**
* struct iommu_fault_page_request - Page Request data
* @flags: encodes whether the corresponding fields are valid and whether this
@@ -140,19 +80,11 @@ struct iommu_fault_page_request {
/**
* struct iommu_fault - Generic fault data
* @type: fault type from &enum iommu_fault_type
- * @padding: reserved for future use (should be zero)
- * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
* @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
- * @padding2: sets the fault size to allow for future extensions
*/
struct iommu_fault {
__u32 type;
- __u32 padding;
- union {
- struct iommu_fault_unrecoverable event;
- struct iommu_fault_page_request prm;
- __u8 padding2[56];
- };
+ struct iommu_fault_page_request prm;
};
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 03/10] iommu: Remove unrecoverable fault data
2023-08-25 2:30 ` [PATCH v4 03/10] iommu: Remove unrecoverable fault data Lu Baolu
@ 2023-08-25 7:53 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 7:53 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Jason Gunthorpe
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> The unrecoverable fault data is not used anywhere. Remove it to avoid
> dead code.
>
> Suggested-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 04/10] iommu: Cleanup iopf data structure definitions
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (2 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 03/10] iommu: Remove unrecoverable fault data Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 7:57 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 05/10] iommu: Merge iopf_device_param into iommu_fault_param Lu Baolu
` (5 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
struct iommu_fault_page_request and struct iommu_page_response are not
part of uAPI anymore. Convert them to data structures for kAPI.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 27 +++++++++++----------------
drivers/iommu/io-pgfault.c | 1 -
drivers/iommu/iommu.c | 4 ----
3 files changed, 11 insertions(+), 21 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1c8f33d0a4c1..2dc8017d444c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -69,12 +69,12 @@ struct iommu_fault_page_request {
#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 perm;
- __u64 addr;
- __u64 private_data[2];
+ u32 flags;
+ u32 pasid;
+ u32 grpid;
+ u32 perm;
+ u64 addr;
+ u64 private_data[2];
};
/**
@@ -83,7 +83,7 @@ struct iommu_fault_page_request {
* @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
*/
struct iommu_fault {
- __u32 type;
+ u32 type;
struct iommu_fault_page_request prm;
};
@@ -104,8 +104,6 @@ enum iommu_page_response_code {
/**
* struct iommu_page_response - Generic page response information
- * @argsz: User filled size of this data
- * @version: API version of this structure
* @flags: encodes whether the corresponding fields are valid
* (IOMMU_FAULT_PAGE_RESPONSE_* values)
* @pasid: Process Address Space ID
@@ -113,14 +111,11 @@ enum iommu_page_response_code {
* @code: response code from &enum iommu_page_response_code
*/
struct iommu_page_response {
- __u32 argsz;
-#define IOMMU_PAGE_RESP_VERSION_1 1
- __u32 version;
#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 code;
+ u32 flags;
+ u32 pasid;
+ u32 grpid;
+ u32 code;
};
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index e5b8b9110c13..24b5545352ae 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -56,7 +56,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
enum iommu_page_response_code status)
{
struct iommu_page_response resp = {
- .version = IOMMU_PAGE_RESP_VERSION_1,
.pasid = iopf->fault.prm.pasid,
.grpid = iopf->fault.prm.grpid,
.code = status,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 39601fbfd0e0..f654fa8ae280 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1414,10 +1414,6 @@ int iommu_page_response(struct device *dev,
if (!param || !param->fault_param)
return -EINVAL;
- if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
- msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
- return -EINVAL;
-
/* Only send response if there is a fault report pending */
mutex_lock(¶m->fault_param->lock);
if (list_empty(¶m->fault_param->faults)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 04/10] iommu: Cleanup iopf data structure definitions
2023-08-25 2:30 ` [PATCH v4 04/10] iommu: Cleanup iopf data structure definitions Lu Baolu
@ 2023-08-25 7:57 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 7:57 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Jason Gunthorpe
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> struct iommu_fault_page_request and struct iommu_page_response are not
> part of uAPI anymore. Convert them to data structures for kAPI.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 05/10] iommu: Merge iopf_device_param into iommu_fault_param
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (3 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 04/10] iommu: Cleanup iopf data structure definitions Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 8:00 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 06/10] iommu: Remove iommu_[un]register_device_fault_handler() Lu Baolu
` (4 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
The struct dev_iommu contains two pointers, fault_param and iopf_param.
The fault_param pointer points to a data structure that is used to store
pending faults that are awaiting responses. The iopf_param pointer points
to a data structure that is used to store partial faults that are part of
a Page Request Group.
The fault_param and iopf_param pointers are essentially duplicate. This
causes memory waste. Merge the iopf_device_param pointer into the
iommu_fault_param pointer to consolidate the code and save memory. The
consolidated pointer would be allocated on demand when the device driver
enables the iopf on device, and would be freed after iopf is disabled.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 18 ++++--
drivers/iommu/io-pgfault.c | 113 ++++++++++++++++++-------------------
drivers/iommu/iommu.c | 34 ++---------
3 files changed, 75 insertions(+), 90 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2dc8017d444c..f7971311ef43 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -40,6 +40,7 @@ struct notifier_block;
struct iommu_sva;
struct iommu_fault_event;
struct iommu_dma_cookie;
+struct iopf_queue;
#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
@@ -468,21 +469,31 @@ struct iommu_fault_event {
* struct iommu_fault_param - per-device IOMMU fault data
* @handler: Callback function to handle IOMMU faults at device level
* @data: handler private data
- * @faults: holds the pending faults which needs response
* @lock: protect pending faults list
+ * @dev: the device that owns this param
+ * @queue: IOPF queue
+ * @queue_list: index into queue->devices
+ * @partial: faults that are part of a Page Request Group for which the last
+ * request hasn't been submitted yet.
+ * @faults: holds the pending faults which needs response
*/
struct iommu_fault_param {
iommu_dev_fault_handler_t handler;
void *data;
+ struct mutex lock;
+
+ struct device *dev;
+ struct iopf_queue *queue;
+ struct list_head queue_list;
+
+ struct list_head partial;
struct list_head faults;
- struct mutex lock;
};
/**
* struct dev_iommu - Collection of per-device IOMMU data
*
* @fault_param: IOMMU detected device fault reporting data
- * @iopf_param: I/O Page Fault queue and data
* @fwspec: IOMMU fwspec data
* @iommu_dev: IOMMU device this device is linked to
* @priv: IOMMU Driver private data
@@ -497,7 +508,6 @@ struct iommu_fault_param {
struct dev_iommu {
struct mutex lock;
struct iommu_fault_param *fault_param;
- struct iopf_device_param *iopf_param;
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void *priv;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 24b5545352ae..b1cf28055525 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -25,21 +25,6 @@ struct iopf_queue {
struct mutex lock;
};
-/**
- * struct iopf_device_param - IO Page Fault data attached to a device
- * @dev: the device that owns this param
- * @queue: IOPF queue
- * @queue_list: index into queue->devices
- * @partial: faults that are part of a Page Request Group for which the last
- * request hasn't been submitted yet.
- */
-struct iopf_device_param {
- struct device *dev;
- struct iopf_queue *queue;
- struct list_head queue_list;
- struct list_head partial;
-};
-
struct iopf_fault {
struct iommu_fault fault;
struct list_head list;
@@ -144,7 +129,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
int ret;
struct iopf_group *group;
struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param *iopf_param;
struct device *dev = cookie;
struct dev_iommu *param = dev->iommu;
@@ -159,7 +144,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
* As long as we're holding param->lock, the queue can't be unlinked
* from the device and therefore cannot disappear.
*/
- iopf_param = param->iopf_param;
+ iopf_param = param->fault_param;
if (!iopf_param)
return -ENODEV;
@@ -229,14 +214,14 @@ EXPORT_SYMBOL_GPL(iommu_queue_iopf);
int iopf_queue_flush_dev(struct device *dev)
{
int ret = 0;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param *iopf_param;
struct dev_iommu *param = dev->iommu;
if (!param)
return -ENODEV;
mutex_lock(¶m->lock);
- iopf_param = param->iopf_param;
+ iopf_param = param->fault_param;
if (iopf_param)
flush_workqueue(iopf_param->queue->wq);
else
@@ -260,7 +245,7 @@ EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
int iopf_queue_discard_partial(struct iopf_queue *queue)
{
struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param *iopf_param;
if (!queue)
return -EINVAL;
@@ -287,34 +272,38 @@ EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
*/
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
{
- int ret = -EBUSY;
- struct iopf_device_param *iopf_param;
+ int ret = 0;
struct dev_iommu *param = dev->iommu;
-
- if (!param)
- return -ENODEV;
-
- iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
- if (!iopf_param)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&iopf_param->partial);
- iopf_param->queue = queue;
- iopf_param->dev = dev;
+ struct iommu_fault_param *fault_param;
mutex_lock(&queue->lock);
mutex_lock(¶m->lock);
- if (!param->iopf_param) {
- list_add(&iopf_param->queue_list, &queue->devices);
- param->iopf_param = iopf_param;
- ret = 0;
+ if (param->fault_param) {
+ ret = -EBUSY;
+ goto done_unlock;
}
+
+ get_device(dev);
+ fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL);
+ if (!fault_param) {
+ put_device(dev);
+ ret = -ENOMEM;
+ goto done_unlock;
+ }
+
+ mutex_init(&fault_param->lock);
+ INIT_LIST_HEAD(&fault_param->faults);
+ INIT_LIST_HEAD(&fault_param->partial);
+ fault_param->dev = dev;
+ list_add(&fault_param->queue_list, &queue->devices);
+ fault_param->queue = queue;
+
+ param->fault_param = fault_param;
+
+done_unlock:
mutex_unlock(¶m->lock);
mutex_unlock(&queue->lock);
- if (ret)
- kfree(iopf_param);
-
return ret;
}
EXPORT_SYMBOL_GPL(iopf_queue_add_device);
@@ -330,34 +319,42 @@ EXPORT_SYMBOL_GPL(iopf_queue_add_device);
*/
int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
{
- int ret = -EINVAL;
+ int ret = 0;
struct iopf_fault *iopf, *next;
- struct iopf_device_param *iopf_param;
struct dev_iommu *param = dev->iommu;
-
- if (!param || !queue)
- return -EINVAL;
+ struct iommu_fault_param *fault_param = param->fault_param;
mutex_lock(&queue->lock);
mutex_lock(¶m->lock);
- iopf_param = param->iopf_param;
- if (iopf_param && iopf_param->queue == queue) {
- list_del(&iopf_param->queue_list);
- param->iopf_param = NULL;
- ret = 0;
+ if (!fault_param) {
+ ret = -ENODEV;
+ goto unlock;
}
- mutex_unlock(¶m->lock);
- mutex_unlock(&queue->lock);
- if (ret)
- return ret;
+
+ if (fault_param->queue != queue) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (!list_empty(&fault_param->faults)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ list_del(&fault_param->queue_list);
/* Just in case some faults are still stuck */
- list_for_each_entry_safe(iopf, next, &iopf_param->partial, list)
+ list_for_each_entry_safe(iopf, next, &fault_param->partial, list)
kfree(iopf);
- kfree(iopf_param);
+ param->fault_param = NULL;
+ kfree(fault_param);
+ put_device(dev);
+unlock:
+ mutex_unlock(¶m->lock);
+ mutex_unlock(&queue->lock);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
@@ -403,7 +400,7 @@ EXPORT_SYMBOL_GPL(iopf_queue_alloc);
*/
void iopf_queue_free(struct iopf_queue *queue)
{
- struct iopf_device_param *iopf_param, *next;
+ struct iommu_fault_param *iopf_param, *next;
if (!queue)
return;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f654fa8ae280..579e155a65c0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1275,27 +1275,18 @@ int iommu_register_device_fault_handler(struct device *dev,
struct dev_iommu *param = dev->iommu;
int ret = 0;
- if (!param)
+ if (!param || !param->fault_param)
return -EINVAL;
mutex_lock(¶m->lock);
/* Only allow one fault handler registered for each device */
- if (param->fault_param) {
+ if (param->fault_param->handler) {
ret = -EBUSY;
goto done_unlock;
}
- get_device(dev);
- param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
- if (!param->fault_param) {
- put_device(dev);
- ret = -ENOMEM;
- goto done_unlock;
- }
param->fault_param->handler = handler;
param->fault_param->data = data;
- mutex_init(¶m->fault_param->lock);
- INIT_LIST_HEAD(¶m->fault_param->faults);
done_unlock:
mutex_unlock(¶m->lock);
@@ -1316,29 +1307,16 @@ EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
int iommu_unregister_device_fault_handler(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
- int ret = 0;
- if (!param)
+ if (!param || !param->fault_param)
return -EINVAL;
mutex_lock(¶m->lock);
-
- if (!param->fault_param)
- goto unlock;
-
- /* we cannot unregister handler if there are pending faults */
- if (!list_empty(¶m->fault_param->faults)) {
- ret = -EBUSY;
- goto unlock;
- }
-
- kfree(param->fault_param);
- param->fault_param = NULL;
- put_device(dev);
-unlock:
+ param->fault_param->handler = NULL;
+ param->fault_param->data = NULL;
mutex_unlock(¶m->lock);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 05/10] iommu: Merge iopf_device_param into iommu_fault_param
2023-08-25 2:30 ` [PATCH v4 05/10] iommu: Merge iopf_device_param into iommu_fault_param Lu Baolu
@ 2023-08-25 8:00 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 8:00 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Jason Gunthorpe
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> @@ -468,21 +469,31 @@ struct iommu_fault_event {
> * struct iommu_fault_param - per-device IOMMU fault data
> * @handler: Callback function to handle IOMMU faults at device level
> * @data: handler private data
> - * @faults: holds the pending faults which needs response
> * @lock: protect pending faults list
> + * @dev: the device that owns this param
> + * @queue: IOPF queue
> + * @queue_list: index into queue->devices
> + * @partial: faults that are part of a Page Request Group for which the last
> + * request hasn't been submitted yet.
could clarify that it's protected by dev_iommu lock.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 06/10] iommu: Remove iommu_[un]register_device_fault_handler()
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (4 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 05/10] iommu: Merge iopf_device_param into iommu_fault_param Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 2:30 ` [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault Lu Baolu
` (3 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
The individual iommu driver reports the iommu page faults by calling
iommu_report_device_fault(), where a pre-registered device fault handler
is called to route the fault to another fault handler installed on the
corresponding iommu domain.
The pre-registered device fault handler is static and won't be dynamic
as the fault handler is eventually per iommu domain. Replace calling
device fault handler with iommu_queue_iopf().
After this replacement, the registering and unregistering fault handler
interfaces are not needed anywhere. Remove the interfaces and the related
data structures to avoid dead code.
Convert cookie parameter of iommu_queue_iopf() into a device pointer that
is really passed.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 23 ------
drivers/iommu/iommu-sva.h | 4 +-
.../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 13 +---
drivers/iommu/intel/iommu.c | 24 ++----
drivers/iommu/io-pgfault.c | 6 +-
drivers/iommu/iommu.c | 76 +------------------
6 files changed, 13 insertions(+), 133 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f7971311ef43..3488da75d949 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -126,7 +126,6 @@ struct iommu_page_response {
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
-typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -467,8 +466,6 @@ struct iommu_fault_event {
/**
* struct iommu_fault_param - per-device IOMMU fault data
- * @handler: Callback function to handle IOMMU faults at device level
- * @data: handler private data
* @lock: protect pending faults list
* @dev: the device that owns this param
* @queue: IOPF queue
@@ -478,8 +475,6 @@ struct iommu_fault_event {
* @faults: holds the pending faults which needs response
*/
struct iommu_fault_param {
- iommu_dev_fault_handler_t handler;
- void *data;
struct mutex lock;
struct device *dev;
@@ -611,11 +606,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data,
extern struct iommu_group *iommu_group_get(struct device *dev);
extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data);
-
-extern int iommu_unregister_device_fault_handler(struct device *dev);
extern int iommu_report_device_fault(struct device *dev,
struct iommu_fault_event *evt);
@@ -999,19 +989,6 @@ static inline void iommu_group_put(struct iommu_group *group)
{
}
-static inline
-int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data)
-{
- return -ENODEV;
-}
-
-static inline int iommu_unregister_device_fault_handler(struct device *dev)
-{
- return 0;
-}
-
static inline
int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
{
diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
index 54946b5a7caf..de7819c796ce 100644
--- a/drivers/iommu/iommu-sva.h
+++ b/drivers/iommu/iommu-sva.h
@@ -13,7 +13,7 @@ struct iommu_fault;
struct iopf_queue;
#ifdef CONFIG_IOMMU_SVA
-int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
+int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev);
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
int iopf_queue_remove_device(struct iopf_queue *queue,
@@ -26,7 +26,7 @@ enum iommu_page_response_code
iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
#else /* CONFIG_IOMMU_SVA */
-static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
+static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
{
return -ENODEV;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 5e6b39881c04..7748ee30f8c0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -437,7 +437,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
{
- int ret;
struct device *dev = master->dev;
/*
@@ -450,16 +449,7 @@ static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
if (!master->iopf_enabled)
return -EINVAL;
- ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
- if (ret)
- return ret;
-
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret) {
- iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
- return ret;
- }
- return 0;
+ return iopf_queue_add_device(master->smmu->evtq.iopf, dev);
}
static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
@@ -469,7 +459,6 @@ static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
if (!master->iopf_enabled)
return;
- iommu_unregister_device_fault_handler(dev);
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dd8ff358867d..35839ef69089 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4574,23 +4574,15 @@ static int intel_iommu_enable_iopf(struct device *dev)
if (ret)
return ret;
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
- if (ret)
- goto iopf_remove_device;
-
ret = pci_enable_pri(pdev, PRQ_DEPTH);
- if (ret)
- goto iopf_unregister_handler;
+ if (ret) {
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
+ return ret;
+ }
+
info->pri_enabled = 1;
return 0;
-
-iopf_unregister_handler:
- iommu_unregister_device_fault_handler(dev);
-iopf_remove_device:
- iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- return ret;
}
static int intel_iommu_disable_iopf(struct device *dev)
@@ -4613,11 +4605,9 @@ static int intel_iommu_disable_iopf(struct device *dev)
info->pri_enabled = 0;
/*
- * With PRI disabled and outstanding PRQs drained, unregistering
- * fault handler and removing device from iopf queue should never
- * fail.
+ * With PRI disabled and outstanding PRQs drained, removing device
+ * from iopf queue should never fail.
*/
- WARN_ON(iommu_unregister_device_fault_handler(dev));
WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev));
return 0;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index b1cf28055525..31832aeacdba 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -87,7 +87,7 @@ static void iopf_handler(struct work_struct *work)
/**
* iommu_queue_iopf - IO Page Fault handler
* @fault: fault event
- * @cookie: struct device, passed to iommu_register_device_fault_handler.
+ * @dev: struct device.
*
* Add a fault to the device workqueue, to be handled by mm.
*
@@ -124,14 +124,12 @@ static void iopf_handler(struct work_struct *work)
*
* Return: 0 on success and <0 on error.
*/
-int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
+int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
{
int ret;
struct iopf_group *group;
struct iopf_fault *iopf, *next;
struct iommu_fault_param *iopf_param;
-
- struct device *dev = cookie;
struct dev_iommu *param = dev->iommu;
lockdep_assert_held(¶m->lock);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 579e155a65c0..8c541af107e8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1250,76 +1250,6 @@ void iommu_group_put(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_put);
-/**
- * iommu_register_device_fault_handler() - Register a device fault handler
- * @dev: the device
- * @handler: the fault handler
- * @data: private data passed as argument to the handler
- *
- * When an IOMMU fault event is received, this handler gets called with the
- * fault event and data as argument. The handler should return 0 on success. If
- * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
- * complete the fault by calling iommu_page_response() with one of the following
- * response code:
- * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
- * - IOMMU_PAGE_RESP_INVALID: terminate the fault
- * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
- * page faults if possible.
- *
- * Return 0 if the fault handler was installed successfully, or an error.
- */
-int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data)
-{
- struct dev_iommu *param = dev->iommu;
- int ret = 0;
-
- if (!param || !param->fault_param)
- return -EINVAL;
-
- mutex_lock(¶m->lock);
- /* Only allow one fault handler registered for each device */
- if (param->fault_param->handler) {
- ret = -EBUSY;
- goto done_unlock;
- }
-
- param->fault_param->handler = handler;
- param->fault_param->data = data;
-
-done_unlock:
- mutex_unlock(¶m->lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
-
-/**
- * iommu_unregister_device_fault_handler() - Unregister the device fault handler
- * @dev: the device
- *
- * Remove the device fault handler installed with
- * iommu_register_device_fault_handler().
- *
- * Return 0 on success, or an error.
- */
-int iommu_unregister_device_fault_handler(struct device *dev)
-{
- struct dev_iommu *param = dev->iommu;
-
- if (!param || !param->fault_param)
- return -EINVAL;
-
- mutex_lock(¶m->lock);
- param->fault_param->handler = NULL;
- param->fault_param->data = NULL;
- mutex_unlock(¶m->lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
-
/**
* iommu_report_device_fault() - Report fault event to device driver
* @dev: the device
@@ -1344,10 +1274,6 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
/* we only report device fault if there is a handler registered */
mutex_lock(¶m->lock);
fparam = param->fault_param;
- if (!fparam || !fparam->handler) {
- ret = -EINVAL;
- goto done_unlock;
- }
if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
(evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
@@ -1362,7 +1288,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
mutex_unlock(&fparam->lock);
}
- ret = fparam->handler(&evt->fault, fparam->data);
+ ret = iommu_queue_iopf(&evt->fault, dev);
if (ret && evt_pending) {
mutex_lock(&fparam->lock);
list_del(&evt_pending->list);
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (5 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 06/10] iommu: Remove iommu_[un]register_device_fault_handler() Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 8:03 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 08/10] iommu: Prepare for separating SVA and IOPF Lu Baolu
` (2 subsequent siblings)
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu
The iommu_fault_event and iopf_fault data structures store the same
information about an iopf fault. They are also used in the same way.
Merge these two data structures into a single one to make the code
more concise and easier to maintain.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
include/linux/iommu.h | 27 ++++++---------------
drivers/iommu/intel/iommu.h | 2 +-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +--
drivers/iommu/intel/svm.c | 5 ++--
drivers/iommu/io-pgfault.c | 5 ----
drivers/iommu/iommu.c | 8 +++---
6 files changed, 17 insertions(+), 34 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3488da75d949..7c5781732665 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -38,7 +38,6 @@ struct iommu_domain;
struct iommu_domain_ops;
struct notifier_block;
struct iommu_sva;
-struct iommu_fault_event;
struct iommu_dma_cookie;
struct iopf_queue;
@@ -119,6 +118,11 @@ struct iommu_page_response {
u32 code;
};
+struct iopf_fault {
+ struct iommu_fault fault;
+ /* node for pending lists */
+ struct list_head list;
+};
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
@@ -357,7 +361,7 @@ struct iommu_ops {
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
int (*page_response)(struct device *dev,
- struct iommu_fault_event *evt,
+ struct iopf_fault *evt,
struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
@@ -450,20 +454,6 @@ struct iommu_device {
u32 max_pasids;
};
-/**
- * struct iommu_fault_event - Generic fault event
- *
- * Can represent recoverable faults such as a page requests or
- * unrecoverable faults such as DMA or IRQ remapping faults.
- *
- * @fault: fault descriptor
- * @list: pending fault event list, used for tracking responses
- */
-struct iommu_fault_event {
- struct iommu_fault fault;
- struct list_head list;
-};
-
/**
* struct iommu_fault_param - per-device IOMMU fault data
* @lock: protect pending faults list
@@ -607,8 +597,7 @@ extern struct iommu_group *iommu_group_get(struct device *dev);
extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_report_device_fault(struct device *dev,
- struct iommu_fault_event *evt);
+extern int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
extern int iommu_page_response(struct device *dev,
struct iommu_page_response *msg);
@@ -990,7 +979,7 @@ static inline void iommu_group_put(struct iommu_group *group)
}
static inline
-int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
{
return -ENODEV;
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index c18fb699c87a..66221bb206f4 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -847,7 +847,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
void intel_svm_check(struct intel_iommu *iommu);
int intel_svm_enable_prq(struct intel_iommu *iommu);
int intel_svm_finish_prq(struct intel_iommu *iommu);
-int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+int intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(void);
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 90b281eda73f..e537b6c046c5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -922,7 +922,7 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
}
static int arm_smmu_page_response(struct device *dev,
- struct iommu_fault_event *unused,
+ struct iopf_fault *unused,
struct iommu_page_response *resp)
{
struct arm_smmu_cmdq_ent cmd = {0};
@@ -1473,7 +1473,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
struct arm_smmu_master *master;
bool ssid_valid = evt[0] & EVTQ_0_SSV;
u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
- struct iommu_fault_event fault_evt = { };
+ struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 9fbae9af6615..9e8d4d75d293 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -543,13 +543,12 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
struct page_req_dsc *desc)
{
- struct iommu_fault_event event;
+ struct iopf_fault event = { };
if (!dev || !dev_is_pci(dev))
return -ENODEV;
/* Fill in event data for device specific processing */
- memset(&event, 0, sizeof(struct iommu_fault_event));
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
event.fault.prm.pasid = desc->pasid;
@@ -721,7 +720,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
}
int intel_svm_page_response(struct device *dev,
- struct iommu_fault_event *evt,
+ struct iopf_fault *evt,
struct iommu_page_response *msg)
{
struct iommu_fault_page_request *prm;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 31832aeacdba..c45977bb7da3 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -25,11 +25,6 @@ struct iopf_queue {
struct mutex lock;
};
-struct iopf_fault {
- struct iommu_fault fault;
- struct list_head list;
-};
-
struct iopf_group {
struct iopf_fault last_fault;
struct list_head faults;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c541af107e8..a7f6d0ec0400 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1261,10 +1261,10 @@ EXPORT_SYMBOL_GPL(iommu_group_put);
*
* Return 0 on success, or an error.
*/
-int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
{
struct dev_iommu *param = dev->iommu;
- struct iommu_fault_event *evt_pending = NULL;
+ struct iopf_fault *evt_pending = NULL;
struct iommu_fault_param *fparam;
int ret = 0;
@@ -1277,7 +1277,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
(evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
- evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
+ evt_pending = kmemdup(evt, sizeof(struct iopf_fault),
GFP_KERNEL);
if (!evt_pending) {
ret = -ENOMEM;
@@ -1306,7 +1306,7 @@ int iommu_page_response(struct device *dev,
{
bool needs_pasid;
int ret = -EINVAL;
- struct iommu_fault_event *evt;
+ struct iopf_fault *evt;
struct iommu_fault_page_request *prm;
struct dev_iommu *param = dev->iommu;
const struct iommu_ops *ops = dev_iommu_ops(dev);
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault
2023-08-25 2:30 ` [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault Lu Baolu
@ 2023-08-25 8:03 ` Tian, Kevin
2023-08-26 7:02 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 8:03 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> -/**
> - * struct iommu_fault_event - Generic fault event
> - *
> - * Can represent recoverable faults such as a page requests or
> - * unrecoverable faults such as DMA or IRQ remapping faults.
> - *
> - * @fault: fault descriptor
> - * @list: pending fault event list, used for tracking responses
> - */
> -struct iommu_fault_event {
> - struct iommu_fault fault;
> - struct list_head list;
> -};
> -
iommu_fault_event is more forward-looking if unrecoverable fault
will be supported in future. From this angle it might make more
sense to keep it to replace iopf_fault.
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault
2023-08-25 8:03 ` Tian, Kevin
@ 2023-08-26 7:02 ` Baolu Lu
2023-08-30 7:33 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-08-26 7:02 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 8/25/23 4:03 PM, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Friday, August 25, 2023 10:30 AM
>>
>> -/**
>> - * struct iommu_fault_event - Generic fault event
>> - *
>> - * Can represent recoverable faults such as a page requests or
>> - * unrecoverable faults such as DMA or IRQ remapping faults.
>> - *
>> - * @fault: fault descriptor
>> - * @list: pending fault event list, used for tracking responses
>> - */
>> -struct iommu_fault_event {
>> - struct iommu_fault fault;
>> - struct list_head list;
>> -};
>> -
>
> iommu_fault_event is more forward-looking if unrecoverable fault
> will be supported in future. From this angle it might make more
> sense to keep it to replace iopf_fault.
Currently IOMMU drivers use
int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
unsigned long iova, int flags)
to report unrecoverable faults. There is no need for a generic fault
event structure.
So alternatively, we can use iopf_fault for now and consolidate a
generic fault data structure when there is a real need.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault
2023-08-26 7:02 ` Baolu Lu
@ 2023-08-30 7:33 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-30 7:33 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Saturday, August 26, 2023 3:02 PM
>
> On 8/25/23 4:03 PM, Tian, Kevin wrote:
> >> From: Lu Baolu <baolu.lu@linux.intel.com>
> >> Sent: Friday, August 25, 2023 10:30 AM
> >>
> >> -/**
> >> - * struct iommu_fault_event - Generic fault event
> >> - *
> >> - * Can represent recoverable faults such as a page requests or
> >> - * unrecoverable faults such as DMA or IRQ remapping faults.
> >> - *
> >> - * @fault: fault descriptor
> >> - * @list: pending fault event list, used for tracking responses
> >> - */
> >> -struct iommu_fault_event {
> >> - struct iommu_fault fault;
> >> - struct list_head list;
> >> -};
> >> -
> >
> > iommu_fault_event is more forward-looking if unrecoverable fault
> > will be supported in future. From this angle it might make more
> > sense to keep it to replace iopf_fault.
>
> Currently IOMMU drivers use
>
> int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
> unsigned long iova, int flags)
>
> to report unrecoverable faults. There is no need for a generic fault
> event structure.
>
> So alternatively, we can use iopf_fault for now and consolidate a
> generic fault data structure when there is a real need.
>
Jean suggested to deprecate report_iommu_fault() and instead use
iommu_report_device_fault() for unrecoverable faults.
but not big deal if you prefer to iopf_fault for now.
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 08/10] iommu: Prepare for separating SVA and IOPF
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (6 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 07/10] iommu: Merge iommu_fault_event and iopf_fault Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 8:05 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic Lu Baolu
2023-08-25 2:30 ` [PATCH v4 10/10] iommu: Separate SVA and IOPF Lu Baolu
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
Move iopf_group data structure to iommu.h to make it a minimal set of
faults that a domain's page fault handler should handle.
Add a new function, iopf_free_group(), to free a fault group after all
faults in the group are handled. This function will be made global so
that it can be called from other files, such as iommu-sva.c.
Move iopf_queue data structure to iommu.h to allow the workqueue to be
scheduled out of this file.
This will simplify the sequential patches.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 20 +++++++++++++++++++-
drivers/iommu/io-pgfault.c | 37 +++++++++++++------------------------
2 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7c5781732665..19420ccd43a7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -39,7 +39,6 @@ struct iommu_domain_ops;
struct notifier_block;
struct iommu_sva;
struct iommu_dma_cookie;
-struct iopf_queue;
#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
@@ -124,6 +123,25 @@ struct iopf_fault {
struct list_head list;
};
+struct iopf_group {
+ struct iopf_fault last_fault;
+ struct list_head faults;
+ struct work_struct work;
+ struct device *dev;
+};
+
+/**
+ * struct iopf_queue - IO Page Fault queue
+ * @wq: the fault workqueue
+ * @devices: devices attached to this queue
+ * @lock: protects the device list
+ */
+struct iopf_queue {
+ struct workqueue_struct *wq;
+ struct list_head devices;
+ struct mutex lock;
+};
+
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
#define IOMMU_FAULT_WRITE 0x1
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index c45977bb7da3..09e05f483b4f 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -13,24 +13,17 @@
#include "iommu-sva.h"
-/**
- * struct iopf_queue - IO Page Fault queue
- * @wq: the fault workqueue
- * @devices: devices attached to this queue
- * @lock: protects the device list
- */
-struct iopf_queue {
- struct workqueue_struct *wq;
- struct list_head devices;
- struct mutex lock;
-};
+static void iopf_free_group(struct iopf_group *group)
+{
+ struct iopf_fault *iopf, *next;
-struct iopf_group {
- struct iopf_fault last_fault;
- struct list_head faults;
- struct work_struct work;
- struct device *dev;
-};
+ list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
+ kfree(iopf);
+ }
+
+ kfree(group);
+}
static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
enum iommu_page_response_code status)
@@ -50,9 +43,9 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
static void iopf_handler(struct work_struct *work)
{
+ struct iopf_fault *iopf;
struct iopf_group *group;
struct iommu_domain *domain;
- struct iopf_fault *iopf, *next;
enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
group = container_of(work, struct iopf_group, work);
@@ -61,7 +54,7 @@ static void iopf_handler(struct work_struct *work)
if (!domain || !domain->iopf_handler)
status = IOMMU_PAGE_RESP_INVALID;
- list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ list_for_each_entry(iopf, &group->faults, list) {
/*
* For the moment, errors are sticky: don't handle subsequent
* faults in the group if there is an error.
@@ -69,14 +62,10 @@ static void iopf_handler(struct work_struct *work)
if (status == IOMMU_PAGE_RESP_SUCCESS)
status = domain->iopf_handler(&iopf->fault,
domain->fault_data);
-
- if (!(iopf->fault.prm.flags &
- IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
- kfree(iopf);
}
iopf_complete_group(group->dev, &group->last_fault, status);
- kfree(group);
+ iopf_free_group(group);
}
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 08/10] iommu: Prepare for separating SVA and IOPF
2023-08-25 2:30 ` [PATCH v4 08/10] iommu: Prepare for separating SVA and IOPF Lu Baolu
@ 2023-08-25 8:05 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 8:05 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Jason Gunthorpe
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> - list_for_each_entry_safe(iopf, next, &group->faults, list) {
> + list_for_each_entry(iopf, &group->faults, list) {
> /*
> * For the moment, errors are sticky: don't handle
> subsequent
> * faults in the group if there is an error.
> @@ -69,14 +62,10 @@ static void iopf_handler(struct work_struct *work)
> if (status == IOMMU_PAGE_RESP_SUCCESS)
> status = domain->iopf_handler(&iopf->fault,
> domain->fault_data);
> -
> - if (!(iopf->fault.prm.flags &
> - IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
> - kfree(iopf);
> }
then no need to continue if status is not success. Yes this is fixed
in next patch but logically the change more suits here.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (7 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 08/10] iommu: Prepare for separating SVA and IOPF Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
2023-08-25 8:17 ` Tian, Kevin
2023-08-25 2:30 ` [PATCH v4 10/10] iommu: Separate SVA and IOPF Lu Baolu
9 siblings, 1 reply; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu
Make iommu_queue_iopf() more generic by making the iopf_group a minimal
set of iopf's that an iopf handler of domain should handle and respond
to. Add domain parameter to struct iopf_group so that the handler can
retrieve and use it directly.
Change iommu_queue_iopf() to forward groups of iopf's to the domain's
iopf handler. This is also a necessary step to decouple the sva iopf
handling code from this interface.
The iopf handling framework in the core requires that domain's lifetime
should cover all possible iopf. This has been documented in the comments
for iommu_queue_iopf(), which is the entry point of the framework. Add
some debugging code to enforce this requirement.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
include/linux/iommu.h | 4 ++--
drivers/iommu/iommu-sva.h | 6 ++---
drivers/iommu/io-pgfault.c | 49 ++++++++++++++++++++++++++++----------
drivers/iommu/iommu-sva.c | 3 +--
drivers/iommu/iommu.c | 23 ++++++++++++++++++
5 files changed, 65 insertions(+), 20 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 19420ccd43a7..687dfde2c874 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -128,6 +128,7 @@ struct iopf_group {
struct list_head faults;
struct work_struct work;
struct device *dev;
+ struct iommu_domain *domain;
};
/**
@@ -197,8 +198,7 @@ struct iommu_domain {
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
- enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
- void *data);
+ int (*iopf_handler)(struct iopf_group *group);
void *fault_data;
union {
struct {
diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
index de7819c796ce..27c8da115b41 100644
--- a/drivers/iommu/iommu-sva.h
+++ b/drivers/iommu/iommu-sva.h
@@ -22,8 +22,7 @@ int iopf_queue_flush_dev(struct device *dev);
struct iopf_queue *iopf_queue_alloc(const char *name);
void iopf_queue_free(struct iopf_queue *queue);
int iopf_queue_discard_partial(struct iopf_queue *queue);
-enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
+int iommu_sva_handle_iopf(struct iopf_group *group);
#else /* CONFIG_IOMMU_SVA */
static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
@@ -62,8 +61,7 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
return -ENODEV;
}
-static inline enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+static inline int iommu_sva_handle_iopf(struct iopf_group *group)
{
return IOMMU_PAGE_RESP_INVALID;
}
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 09e05f483b4f..06330d922925 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -13,6 +13,9 @@
#include "iommu-sva.h"
+enum iommu_page_response_code
+iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm);
+
static void iopf_free_group(struct iopf_group *group)
{
struct iopf_fault *iopf, *next;
@@ -45,23 +48,18 @@ static void iopf_handler(struct work_struct *work)
{
struct iopf_fault *iopf;
struct iopf_group *group;
- struct iommu_domain *domain;
enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
group = container_of(work, struct iopf_group, work);
- domain = iommu_get_domain_for_dev_pasid(group->dev,
- group->last_fault.fault.prm.pasid, 0);
- if (!domain || !domain->iopf_handler)
- status = IOMMU_PAGE_RESP_INVALID;
-
list_for_each_entry(iopf, &group->faults, list) {
/*
* For the moment, errors are sticky: don't handle subsequent
* faults in the group if there is an error.
*/
- if (status == IOMMU_PAGE_RESP_SUCCESS)
- status = domain->iopf_handler(&iopf->fault,
- domain->fault_data);
+ if (status != IOMMU_PAGE_RESP_SUCCESS)
+ break;
+
+ status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
}
iopf_complete_group(group->dev, &group->last_fault, status);
@@ -112,6 +110,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
{
int ret;
struct iopf_group *group;
+ struct iommu_domain *domain;
struct iopf_fault *iopf, *next;
struct iommu_fault_param *iopf_param;
struct dev_iommu *param = dev->iommu;
@@ -143,6 +142,19 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
return 0;
}
+ if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
+ domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
+ else
+ domain = iommu_get_domain_for_dev(dev);
+
+ if (!domain || !domain->iopf_handler) {
+ dev_warn_ratelimited(dev,
+ "iopf from pasid %d received without handler installed\n",
+ fault->prm.pasid);
+ ret = -ENODEV;
+ goto cleanup_partial;
+ }
+
group = kzalloc(sizeof(*group), GFP_KERNEL);
if (!group) {
/*
@@ -157,8 +169,8 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
group->dev = dev;
group->last_fault.fault = *fault;
INIT_LIST_HEAD(&group->faults);
+ group->domain = domain;
list_add(&group->last_fault.list, &group->faults);
- INIT_WORK(&group->work, iopf_handler);
/* See if we have partial faults for this group */
list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
@@ -167,9 +179,11 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
list_move(&iopf->list, &group->faults);
}
- queue_work(iopf_param->queue->wq, &group->work);
- return 0;
+ ret = domain->iopf_handler(group);
+ if (ret)
+ iopf_free_group(group);
+ return ret;
cleanup_partial:
list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
if (iopf->fault.prm.grpid == fault->prm.grpid) {
@@ -181,6 +195,17 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_queue_iopf);
+int iommu_sva_handle_iopf(struct iopf_group *group)
+{
+ struct iommu_fault_param *fault_param = group->dev->iommu->fault_param;
+
+ INIT_WORK(&group->work, iopf_handler);
+ if (!queue_work(fault_param->queue->wq, &group->work))
+ return -EBUSY;
+
+ return 0;
+}
+
/**
* iopf_queue_flush_dev - Ensure that all queued faults have been processed
* @dev: the endpoint whose faults need to be flushed.
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index b78671a8a914..ba0d5b7e106a 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -149,11 +149,10 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
* I/O page fault handler for SVA
*/
enum iommu_page_response_code
-iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm)
{
vm_fault_t ret;
struct vm_area_struct *vma;
- struct mm_struct *mm = data;
unsigned int access_flags = 0;
unsigned int fault_flags = FAULT_FLAG_REMOTE;
struct iommu_fault_page_request *prm = &fault->prm;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a7f6d0ec0400..0704a0f36349 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1951,6 +1951,27 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
return 0;
}
+static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
+{
+ struct iommu_fault_param *iopf_param = dev->iommu->fault_param;
+ struct iopf_fault *iopf;
+
+ if (!iopf_param)
+ return;
+
+ mutex_lock(&iopf_param->lock);
+ list_for_each_entry(iopf, &iopf_param->partial, list) {
+ if (WARN_ON(iopf->fault.prm.pasid == pasid))
+ break;
+ }
+
+ list_for_each_entry(iopf, &iopf_param->faults, list) {
+ if (WARN_ON(iopf->fault.prm.pasid == pasid))
+ break;
+ }
+ mutex_unlock(&iopf_param->lock);
+}
+
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
struct iommu_group *group;
@@ -1959,6 +1980,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
if (!group)
return;
+ assert_no_pending_iopf(dev, IOMMU_NO_PASID);
mutex_lock(&group->mutex);
if (WARN_ON(domain != group->domain) ||
WARN_ON(list_count_nodes(&group->devices) != 1))
@@ -3269,6 +3291,7 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
{
struct iommu_group *group = iommu_group_get(dev);
+ assert_no_pending_iopf(dev, pasid);
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid);
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 2:30 ` [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic Lu Baolu
@ 2023-08-25 8:17 ` Tian, Kevin
2023-08-26 7:32 ` Baolu Lu
` (3 more replies)
0 siblings, 4 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-25 8:17 UTC (permalink / raw)
To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, August 25, 2023 10:30 AM
>
> + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
> + domain = iommu_get_domain_for_dev_pasid(dev, fault-
> >prm.pasid, 0);
> + else
> + domain = iommu_get_domain_for_dev(dev);
> +
> + if (!domain || !domain->iopf_handler) {
> + dev_warn_ratelimited(dev,
> + "iopf from pasid %d received without handler
> installed\n",
"without domain attached or handler installed"
>
> +int iommu_sva_handle_iopf(struct iopf_group *group)
> +{
> + struct iommu_fault_param *fault_param = group->dev->iommu-
> >fault_param;
> +
> + INIT_WORK(&group->work, iopf_handler);
> + if (!queue_work(fault_param->queue->wq, &group->work))
> + return -EBUSY;
> +
> + return 0;
> +}
this function is generic so the name should not tie to 'sva'.
> +
> /**
> * iopf_queue_flush_dev - Ensure that all queued faults have been
> processed
> * @dev: the endpoint whose faults need to be flushed.
Presumably we also need a flush callback per domain given now
the use of workqueue is optional then flush_workqueue() might
not be sufficient.
>
> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
> +{
> + struct iommu_fault_param *iopf_param = dev->iommu-
> >fault_param;
> + struct iopf_fault *iopf;
> +
> + if (!iopf_param)
> + return;
> +
> + mutex_lock(&iopf_param->lock);
> + list_for_each_entry(iopf, &iopf_param->partial, list) {
> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
> + break;
> + }
partial list is protected by dev_iommu lock.
> +
> + list_for_each_entry(iopf, &iopf_param->faults, list) {
> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
> + break;
> + }
> + mutex_unlock(&iopf_param->lock);
> +}
> +
> void iommu_detach_device(struct iommu_domain *domain, struct device
> *dev)
> {
> struct iommu_group *group;
> @@ -1959,6 +1980,7 @@ void iommu_detach_device(struct iommu_domain
> *domain, struct device *dev)
> if (!group)
> return;
>
> + assert_no_pending_iopf(dev, IOMMU_NO_PASID);
> mutex_lock(&group->mutex);
> if (WARN_ON(domain != group->domain) ||
> WARN_ON(list_count_nodes(&group->devices) != 1))
> @@ -3269,6 +3291,7 @@ void iommu_detach_device_pasid(struct
> iommu_domain *domain, struct device *dev,
> {
> struct iommu_group *group = iommu_group_get(dev);
>
> + assert_no_pending_iopf(dev, pasid);
this doesn't look correct. A sane driver will stop triggering new
page request before calling detach but there are still pending ones
not drained until iopf_queue_flush_dev() called by
ops->remove_dev_pasid().
then this check will cause false warning.
> mutex_lock(&group->mutex);
> __iommu_remove_group_pasid(group, pasid);
> WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
> --
> 2.34.1
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 8:17 ` Tian, Kevin
@ 2023-08-26 7:32 ` Baolu Lu
2023-08-30 7:34 ` Tian, Kevin
2023-08-26 8:01 ` Baolu Lu
` (2 subsequent siblings)
3 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-08-26 7:32 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 8/25/23 4:17 PM, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Friday, August 25, 2023 10:30 AM
>>
>> + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
>> + domain = iommu_get_domain_for_dev_pasid(dev, fault-
>>> prm.pasid, 0);
>> + else
>> + domain = iommu_get_domain_for_dev(dev);
>> +
>> + if (!domain || !domain->iopf_handler) {
>> + dev_warn_ratelimited(dev,
>> + "iopf from pasid %d received without handler
>> installed\n",
>
> "without domain attached or handler installed"
Okay.
>
>>
>> +int iommu_sva_handle_iopf(struct iopf_group *group)
>> +{
>> + struct iommu_fault_param *fault_param = group->dev->iommu-
>>> fault_param;
>> +
>> + INIT_WORK(&group->work, iopf_handler);
>> + if (!queue_work(fault_param->queue->wq, &group->work))
>> + return -EBUSY;
>> +
>> + return 0;
>> +}
>
> this function is generic so the name should not tie to 'sva'.
Currently only sva uses it. It's fine to make it generic later when any
new use comes. Does it work to you?
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-26 7:32 ` Baolu Lu
@ 2023-08-30 7:34 ` Tian, Kevin
0 siblings, 0 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-30 7:34 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Saturday, August 26, 2023 3:32 PM
>
> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >> From: Lu Baolu <baolu.lu@linux.intel.com>
> >> Sent: Friday, August 25, 2023 10:30 AM
> >>
> >> +int iommu_sva_handle_iopf(struct iopf_group *group)
> >> +{
> >> + struct iommu_fault_param *fault_param = group->dev->iommu-
> >>> fault_param;
> >> +
> >> + INIT_WORK(&group->work, iopf_handler);
> >> + if (!queue_work(fault_param->queue->wq, &group->work))
> >> + return -EBUSY;
> >> +
> >> + return 0;
> >> +}
> >
> > this function is generic so the name should not tie to 'sva'.
>
> Currently only sva uses it. It's fine to make it generic later when any
> new use comes. Does it work to you?
>
It's fine given you moved this function to sva.c in next patch.
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 8:17 ` Tian, Kevin
2023-08-26 7:32 ` Baolu Lu
@ 2023-08-26 8:01 ` Baolu Lu
2023-08-30 7:43 ` Tian, Kevin
[not found] ` <BN9PR11MB527624F1CC4A545FBAE3C9C98CE6A@BN9PR11MB5276.namprd11.prod.outlook.com>
2023-08-26 8:04 ` Baolu Lu
2023-08-26 8:08 ` Baolu Lu
3 siblings, 2 replies; 48+ messages in thread
From: Baolu Lu @ 2023-08-26 8:01 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 8/25/23 4:17 PM, Tian, Kevin wrote:
>> +
>> /**
>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>> processed
>> * @dev: the endpoint whose faults need to be flushed.
> Presumably we also need a flush callback per domain given now
> the use of workqueue is optional then flush_workqueue() might
> not be sufficient.
>
The iopf_queue_flush_dev() function flushes all pending faults from the
IOMMU queue for a specific device. It has no means to flush fault queues
out of iommu core.
The iopf_queue_flush_dev() function is typically called when a domain is
detaching from a PASID. Hence it's necessary to flush the pending faults
from top to bottom. For example, iommufd should flush pending faults in
its fault queues after detaching the domain from the pasid.
The fault_param->lock mutex is sufficient to avoid the race condition if
the workqueue is not used. However, if the workqueue is used, then it is
possible for a workqueue thread to be in the middle of delivering a
fault while the fault queue is being flushed.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-26 8:01 ` Baolu Lu
@ 2023-08-30 7:43 ` Tian, Kevin
2023-08-30 11:02 ` Vasant Hegde
2023-08-31 9:27 ` Baolu Lu
[not found] ` <BN9PR11MB527624F1CC4A545FBAE3C9C98CE6A@BN9PR11MB5276.namprd11.prod.outlook.com>
1 sibling, 2 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-08-30 7:43 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Saturday, August 26, 2023 4:01 PM
>
> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >> +
> >> /**
> >> * iopf_queue_flush_dev - Ensure that all queued faults have been
> >> processed
> >> * @dev: the endpoint whose faults need to be flushed.
> > Presumably we also need a flush callback per domain given now
> > the use of workqueue is optional then flush_workqueue() might
> > not be sufficient.
> >
>
> The iopf_queue_flush_dev() function flushes all pending faults from the
> IOMMU queue for a specific device. It has no means to flush fault queues
> out of iommu core.
>
> The iopf_queue_flush_dev() function is typically called when a domain is
> detaching from a PASID. Hence it's necessary to flush the pending faults
> from top to bottom. For example, iommufd should flush pending faults in
> its fault queues after detaching the domain from the pasid.
>
Is there an ordering problem? The last step of intel_svm_drain_prq()
in the detaching path issues a set of descriptors to drain page requests
and responses in hardware. It cannot complete if not all software queues
are drained and it's counter-intuitive to drain a software queue after
the hardware draining has already been completed.
btw just flushing requests is probably insufficient in iommufd case since
the responses are received asynchronously. It requires an interface to
drain both requests and responses (presumably with timeouts in case
of a malicious guest which never responds) in the detach path.
it's not a problem for sva as responses are synchrounsly delivered after
handling mm fault. So fine to not touch it in this series but certainly
this area needs more work when moving to support iommufd. 😊
btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
Isn't it a common requirement for all sva-capable drivers?
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 7:43 ` Tian, Kevin
@ 2023-08-30 11:02 ` Vasant Hegde
2023-08-30 12:49 ` Jean-Philippe Brucker
2023-08-31 9:27 ` Baolu Lu
1 sibling, 1 reply; 48+ messages in thread
From: Vasant Hegde @ 2023-08-30 11:02 UTC (permalink / raw)
To: Tian, Kevin, Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
Tian, Baolu,
On 8/30/2023 1:13 PM, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Saturday, August 26, 2023 4:01 PM
>>
>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>> +
>>>> /**
>>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>>>> processed
>>>> * @dev: the endpoint whose faults need to be flushed.
>>> Presumably we also need a flush callback per domain given now
>>> the use of workqueue is optional then flush_workqueue() might
>>> not be sufficient.
>>>
>>
>> The iopf_queue_flush_dev() function flushes all pending faults from the
>> IOMMU queue for a specific device. It has no means to flush fault queues
>> out of iommu core.
>>
>> The iopf_queue_flush_dev() function is typically called when a domain is
>> detaching from a PASID. Hence it's necessary to flush the pending faults
>> from top to bottom. For example, iommufd should flush pending faults in
>> its fault queues after detaching the domain from the pasid.
>>
>
> Is there an ordering problem? The last step of intel_svm_drain_prq()
> in the detaching path issues a set of descriptors to drain page requests
> and responses in hardware. It cannot complete if not all software queues
> are drained and it's counter-intuitive to drain a software queue after
> the hardware draining has already been completed.
>
> btw just flushing requests is probably insufficient in iommufd case since
> the responses are received asynchronously. It requires an interface to
> drain both requests and responses (presumably with timeouts in case
> of a malicious guest which never responds) in the detach path.
>
> it's not a problem for sva as responses are synchrounsly delivered after
> handling mm fault. So fine to not touch it in this series but certainly
> this area needs more work when moving to support iommufd. 😊
>
> btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
> Isn't it a common requirement for all sva-capable drivers?
I had same question when we did SVA implementation for AMD IOMMU [1]. Currently
we call queue_flush from remove_dev_pasid() path. Since PASID can be enabled
without ATS/PRI, I thought its individual drivers responsibility.
But looking this series, does it make sense to handle queue_flush in core layer?
[1]
https://lore.kernel.org/linux-iommu/20230823140415.729050-1-vasant.hegde@amd.com/T/#t
-Vasant
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 11:02 ` Vasant Hegde
@ 2023-08-30 12:49 ` Jean-Philippe Brucker
2023-08-31 6:57 ` Vasant Hegde
0 siblings, 1 reply; 48+ messages in thread
From: Jean-Philippe Brucker @ 2023-08-30 12:49 UTC (permalink / raw)
To: Vasant Hegde
Cc: Tian, Kevin, Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Nicolin Chen, Liu, Yi L, Jacob Pan,
iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
On Wed, Aug 30, 2023 at 04:32:47PM +0530, Vasant Hegde wrote:
> Tian, Baolu,
>
> On 8/30/2023 1:13 PM, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Saturday, August 26, 2023 4:01 PM
> >>
> >> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >>>> +
> >>>> /**
> >>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
> >>>> processed
> >>>> * @dev: the endpoint whose faults need to be flushed.
> >>> Presumably we also need a flush callback per domain given now
> >>> the use of workqueue is optional then flush_workqueue() might
> >>> not be sufficient.
> >>>
> >>
> >> The iopf_queue_flush_dev() function flushes all pending faults from the
> >> IOMMU queue for a specific device. It has no means to flush fault queues
> >> out of iommu core.
> >>
> >> The iopf_queue_flush_dev() function is typically called when a domain is
> >> detaching from a PASID. Hence it's necessary to flush the pending faults
> >> from top to bottom. For example, iommufd should flush pending faults in
> >> its fault queues after detaching the domain from the pasid.
> >>
> >
> > Is there an ordering problem? The last step of intel_svm_drain_prq()
> > in the detaching path issues a set of descriptors to drain page requests
> > and responses in hardware. It cannot complete if not all software queues
> > are drained and it's counter-intuitive to drain a software queue after
> > the hardware draining has already been completed.
> >
> > btw just flushing requests is probably insufficient in iommufd case since
> > the responses are received asynchronously. It requires an interface to
> > drain both requests and responses (presumably with timeouts in case
> > of a malicious guest which never responds) in the detach path.
> >
> > it's not a problem for sva as responses are synchrounsly delivered after
> > handling mm fault. So fine to not touch it in this series but certainly
> > this area needs more work when moving to support iommufd. 😊
> >
> > btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
> > Isn't it a common requirement for all sva-capable drivers?
It's not needed by the SMMUv3 driver because it doesn't implement PRI yet,
only the Arm-specific stall fault model where DMA transactions are held in
the SMMU while waiting for the OS to handle IOPFs. Since a device driver
must complete all DMA transactions before calling unbind(), with the stall
model there are no pending IOPFs to flush on unbind(). PRI support with
Stop Markers would add a call to iopf_queue_flush_dev() after flushing the
SMMU PRI queue [2].
Moving the flush to the core shouldn't be a problem, as long as the driver
gets a chance to flush the hardware queue first.
Thanks,
Jean
[2] https://jpbrucker.net/git/linux/commit/?h=sva/2020-12-14&id=bba76fb4ec631bec96f98f14a6cd13b2df81e5ce
>
> I had same question when we did SVA implementation for AMD IOMMU [1]. Currently
> we call queue_flush from remove_dev_pasid() path. Since PASID can be enabled
> without ATS/PRI, I thought its individual drivers responsibility.
> But looking this series, does it make sense to handle queue_flush in core layer?
>
> [1]
> https://lore.kernel.org/linux-iommu/20230823140415.729050-1-vasant.hegde@amd.com/T/#t
>
> -Vasant
>
>
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 12:49 ` Jean-Philippe Brucker
@ 2023-08-31 6:57 ` Vasant Hegde
0 siblings, 0 replies; 48+ messages in thread
From: Vasant Hegde @ 2023-08-31 6:57 UTC (permalink / raw)
To: Jean-Philippe Brucker
Cc: Tian, Kevin, Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Nicolin Chen, Liu, Yi L, Jacob Pan,
iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
Hi Jean,
On 8/30/2023 6:19 PM, Jean-Philippe Brucker wrote:
> On Wed, Aug 30, 2023 at 04:32:47PM +0530, Vasant Hegde wrote:
>> Tian, Baolu,
>>
>> On 8/30/2023 1:13 PM, Tian, Kevin wrote:
>>>> From: Baolu Lu <baolu.lu@linux.intel.com>
>>>> Sent: Saturday, August 26, 2023 4:01 PM
>>>>
>>>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>>>> +
>>>>>> /**
>>>>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>>>>>> processed
>>>>>> * @dev: the endpoint whose faults need to be flushed.
>>>>> Presumably we also need a flush callback per domain given now
>>>>> the use of workqueue is optional then flush_workqueue() might
>>>>> not be sufficient.
>>>>>
>>>>
>>>> The iopf_queue_flush_dev() function flushes all pending faults from the
>>>> IOMMU queue for a specific device. It has no means to flush fault queues
>>>> out of iommu core.
>>>>
>>>> The iopf_queue_flush_dev() function is typically called when a domain is
>>>> detaching from a PASID. Hence it's necessary to flush the pending faults
>>>> from top to bottom. For example, iommufd should flush pending faults in
>>>> its fault queues after detaching the domain from the pasid.
>>>>
>>>
>>> Is there an ordering problem? The last step of intel_svm_drain_prq()
>>> in the detaching path issues a set of descriptors to drain page requests
>>> and responses in hardware. It cannot complete if not all software queues
>>> are drained and it's counter-intuitive to drain a software queue after
>>> the hardware draining has already been completed.
>>>
>>> btw just flushing requests is probably insufficient in iommufd case since
>>> the responses are received asynchronously. It requires an interface to
>>> drain both requests and responses (presumably with timeouts in case
>>> of a malicious guest which never responds) in the detach path.
>>>
>>> it's not a problem for sva as responses are synchrounsly delivered after
>>> handling mm fault. So fine to not touch it in this series but certainly
>>> this area needs more work when moving to support iommufd. 😊
>>>
>>> btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
>>> Isn't it a common requirement for all sva-capable drivers?
>
> It's not needed by the SMMUv3 driver because it doesn't implement PRI yet,
> only the Arm-specific stall fault model where DMA transactions are held in
> the SMMU while waiting for the OS to handle IOPFs. Since a device driver
> must complete all DMA transactions before calling unbind(), with the stall
> model there are no pending IOPFs to flush on unbind(). PRI support with
> Stop Markers would add a call to iopf_queue_flush_dev() after flushing the
> SMMU PRI queue [2].
>
Thanks for the explanation.
> Moving the flush to the core shouldn't be a problem, as long as the driver
> gets a chance to flush the hardware queue first.
I am fine with keeping it as is. I can call iopf_queue_flush_dev() from AMD driver.
-Vasant
>
> Thanks,
> Jean
>
> [2] https://jpbrucker.net/git/linux/commit/?h=sva/2020-12-14&id=bba76fb4ec631bec96f98f14a6cd13b2df81e5ce
>
>>
>> I had same question when we did SVA implementation for AMD IOMMU [1]. Currently
>> we call queue_flush from remove_dev_pasid() path. Since PASID can be enabled
>> without ATS/PRI, I thought its individual drivers responsibility.
>> But looking this series, does it make sense to handle queue_flush in core layer?
>>
>> [1]
>> https://lore.kernel.org/linux-iommu/20230823140415.729050-1-vasant.hegde@amd.com/T/#t
>>
>> -Vasant
>>
>>
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 7:43 ` Tian, Kevin
2023-08-30 11:02 ` Vasant Hegde
@ 2023-08-31 9:27 ` Baolu Lu
2023-09-01 2:49 ` Tian, Kevin
1 sibling, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-08-31 9:27 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/8/30 15:43, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Saturday, August 26, 2023 4:01 PM
>>
>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>> +
>>>> /**
>>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>>>> processed
>>>> * @dev: the endpoint whose faults need to be flushed.
>>> Presumably we also need a flush callback per domain given now
>>> the use of workqueue is optional then flush_workqueue() might
>>> not be sufficient.
>>>
>>
>> The iopf_queue_flush_dev() function flushes all pending faults from the
>> IOMMU queue for a specific device. It has no means to flush fault queues
>> out of iommu core.
>>
>> The iopf_queue_flush_dev() function is typically called when a domain is
>> detaching from a PASID. Hence it's necessary to flush the pending faults
>> from top to bottom. For example, iommufd should flush pending faults in
>> its fault queues after detaching the domain from the pasid.
>>
>
> Is there an ordering problem? The last step of intel_svm_drain_prq()
> in the detaching path issues a set of descriptors to drain page requests
> and responses in hardware. It cannot complete if not all software queues
> are drained and it's counter-intuitive to drain a software queue after
> the hardware draining has already been completed.
>
> btw just flushing requests is probably insufficient in iommufd case since
> the responses are received asynchronously. It requires an interface to
> drain both requests and responses (presumably with timeouts in case
> of a malicious guest which never responds) in the detach path.
You are right. Good catch.
To put it simply, iopf_queue_flush_dev() is insufficient to support the
case of forwarding iopf's over iommufd. Do I understand it right?
Perhaps we should drain the partial list and the response pending list?
With these two lists drained, no more iopf's for the specific pasid will
be forwarded up, and page response from upper layer will be dropped.
>
> it's not a problem for sva as responses are synchrounsly delivered after
> handling mm fault. So fine to not touch it in this series but certainly
> this area needs more work when moving to support iommufd. 😊
Yes, SVA is not affected. The flush_workqueue() is enough for it. As a
preparation series, I hope we can solve this in it. :-)
>
> btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
> Isn't it a common requirement for all sva-capable drivers?
Jean answered this.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-31 9:27 ` Baolu Lu
@ 2023-09-01 2:49 ` Tian, Kevin
2023-09-05 5:19 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-09-01 2:49 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Thursday, August 31, 2023 5:28 PM
>
> On 2023/8/30 15:43, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Saturday, August 26, 2023 4:01 PM
> >>
> >> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >>>> +
> >>>> /**
> >>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
> >>>> processed
> >>>> * @dev: the endpoint whose faults need to be flushed.
> >>> Presumably we also need a flush callback per domain given now
> >>> the use of workqueue is optional then flush_workqueue() might
> >>> not be sufficient.
> >>>
> >>
> >> The iopf_queue_flush_dev() function flushes all pending faults from the
> >> IOMMU queue for a specific device. It has no means to flush fault queues
> >> out of iommu core.
> >>
> >> The iopf_queue_flush_dev() function is typically called when a domain is
> >> detaching from a PASID. Hence it's necessary to flush the pending faults
> >> from top to bottom. For example, iommufd should flush pending faults in
> >> its fault queues after detaching the domain from the pasid.
> >>
> >
> > Is there an ordering problem? The last step of intel_svm_drain_prq()
> > in the detaching path issues a set of descriptors to drain page requests
> > and responses in hardware. It cannot complete if not all software queues
> > are drained and it's counter-intuitive to drain a software queue after
> > the hardware draining has already been completed.
> >
> > btw just flushing requests is probably insufficient in iommufd case since
> > the responses are received asynchronously. It requires an interface to
> > drain both requests and responses (presumably with timeouts in case
> > of a malicious guest which never responds) in the detach path.
>
> You are right. Good catch.
>
> To put it simply, iopf_queue_flush_dev() is insufficient to support the
> case of forwarding iopf's over iommufd. Do I understand it right?
yes
>
> Perhaps we should drain the partial list and the response pending list?
> With these two lists drained, no more iopf's for the specific pasid will
> be forwarded up, and page response from upper layer will be dropped.
>
correct.
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-01 2:49 ` Tian, Kevin
@ 2023-09-05 5:19 ` Baolu Lu
2023-09-11 6:35 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-09-05 5:19 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/9/1 10:49, Tian, Kevin wrote:
>> From: Baolu Lu<baolu.lu@linux.intel.com>
>> Sent: Thursday, August 31, 2023 5:28 PM
>>
>> On 2023/8/30 15:43, Tian, Kevin wrote:
>>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>>> Sent: Saturday, August 26, 2023 4:01 PM
>>>>
>>>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>>>> +
>>>>>> /**
>>>>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>>>>>> processed
>>>>>> * @dev: the endpoint whose faults need to be flushed.
>>>>> Presumably we also need a flush callback per domain given now
>>>>> the use of workqueue is optional then flush_workqueue() might
>>>>> not be sufficient.
>>>>>
>>>> The iopf_queue_flush_dev() function flushes all pending faults from the
>>>> IOMMU queue for a specific device. It has no means to flush fault queues
>>>> out of iommu core.
>>>>
>>>> The iopf_queue_flush_dev() function is typically called when a domain is
>>>> detaching from a PASID. Hence it's necessary to flush the pending faults
>>>> from top to bottom. For example, iommufd should flush pending faults in
>>>> its fault queues after detaching the domain from the pasid.
>>>>
>>> Is there an ordering problem? The last step of intel_svm_drain_prq()
>>> in the detaching path issues a set of descriptors to drain page requests
>>> and responses in hardware. It cannot complete if not all software queues
>>> are drained and it's counter-intuitive to drain a software queue after
>>> the hardware draining has already been completed.
>>>
>>> btw just flushing requests is probably insufficient in iommufd case since
>>> the responses are received asynchronously. It requires an interface to
>>> drain both requests and responses (presumably with timeouts in case
>>> of a malicious guest which never responds) in the detach path.
>> You are right. Good catch.
>>
>> To put it simply, iopf_queue_flush_dev() is insufficient to support the
>> case of forwarding iopf's over iommufd. Do I understand it right?
> yes
I added below patch to address the iopf_queue_flush_dev() issue. What do
you think of this?
iommu: Improve iopf_queue_flush_dev()
The iopf_queue_flush_dev() is called by the iommu driver before releasing
a PASID. It ensures that all pending faults for this PASID have been
handled or cancelled, and won't hit the address space that reuses this
PASID. The driver must make sure that no new fault is added to the queue.
The SMMUv3 driver doesn't use it because it only implements the
Arm-specific stall fault model where DMA transactions are held in the SMMU
while waiting for the OS to handle iopf's. Since a device driver must
complete all DMA transactions before detaching domain, there are no
pending iopf's with the stall model. PRI support requires adding a call to
iopf_queue_flush_dev() after flushing the hardware page fault queue.
The current implementation of iopf_queue_flush_dev() is a simplified
version. It is only suitable for SVA case in which the processing of iopf
is implemented in the inner loop of the iommu subsystem.
Improve this interface to make it also work for handling iopf out of the
iommu core.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
include/linux/iommu.h | 4 ++--
drivers/iommu/intel/svm.c | 2 +-
drivers/iommu/io-pgfault.c | 40 ++++++++++++++++++++++++++++++++++++--
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 77ad33ffe3ac..465e23e945d0 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1275,7 +1275,7 @@ iommu_sva_domain_alloc(struct device *dev, struct
mm_struct *mm)
#ifdef CONFIG_IOMMU_IOPF
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
int iopf_queue_remove_device(struct iopf_queue *queue, struct device
*dev);
-int iopf_queue_flush_dev(struct device *dev);
+int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid);
struct iopf_queue *iopf_queue_alloc(const char *name);
void iopf_queue_free(struct iopf_queue *queue);
int iopf_queue_discard_partial(struct iopf_queue *queue);
@@ -1295,7 +1295,7 @@ iopf_queue_remove_device(struct iopf_queue *queue,
struct device *dev)
return -ENODEV;
}
-static inline int iopf_queue_flush_dev(struct device *dev)
+static inline int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
{
return -ENODEV;
}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 780c5bd73ec2..4c3f4533e337 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -495,7 +495,7 @@ void intel_drain_pasid_prq(struct device *dev, u32
pasid)
goto prq_retry;
}
- iopf_queue_flush_dev(dev);
+ iopf_queue_flush_dev(dev, pasid);
/*
* Perform steps described in VT-d spec CH7.10 to drain page
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 3e6845bc5902..84728fb89ac7 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -309,17 +309,53 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
*
* Return: 0 on success and <0 on error.
*/
-int iopf_queue_flush_dev(struct device *dev)
+int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
{
struct iommu_fault_param *iopf_param = iopf_get_dev_fault_param(dev);
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_page_response resp;
+ struct iopf_fault *iopf, *next;
+ int ret = 0;
if (!iopf_param)
return -ENODEV;
flush_workqueue(iopf_param->queue->wq);
+
+ mutex_lock(&iopf_param->lock);
+ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
+ if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
+ iopf->fault.prm.pasid != pasid)
+ break;
+
+ list_del(&iopf->list);
+ kfree(iopf);
+ }
+
+ list_for_each_entry_safe(iopf, next, &iopf_param->faults, list) {
+ if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
+ iopf->fault.prm.pasid != pasid)
+ continue;
+
+ memset(&resp, 0, sizeof(struct iommu_page_response));
+ resp.pasid = iopf->fault.prm.pasid;
+ resp.grpid = iopf->fault.prm.grpid;
+ resp.code = IOMMU_PAGE_RESP_INVALID;
+
+ if (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)
+ resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
+
+ ret = ops->page_response(dev, iopf, &resp);
+ if (ret)
+ break;
+
+ list_del(&iopf->list);
+ kfree(iopf);
+ }
+ mutex_unlock(&iopf_param->lock);
iopf_put_dev_fault_param(iopf_param);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
Best regards,
baolu
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-05 5:19 ` Baolu Lu
@ 2023-09-11 6:35 ` Tian, Kevin
2023-09-11 12:26 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-09-11 6:35 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Tuesday, September 5, 2023 1:20 PM
>
> I added below patch to address the iopf_queue_flush_dev() issue. What do
> you think of this?
>
> iommu: Improve iopf_queue_flush_dev()
>
> The iopf_queue_flush_dev() is called by the iommu driver before releasing
> a PASID. It ensures that all pending faults for this PASID have been
> handled or cancelled, and won't hit the address space that reuses this
> PASID. The driver must make sure that no new fault is added to the queue.
>
> The SMMUv3 driver doesn't use it because it only implements the
> Arm-specific stall fault model where DMA transactions are held in the SMMU
> while waiting for the OS to handle iopf's. Since a device driver must
> complete all DMA transactions before detaching domain, there are no
> pending iopf's with the stall model. PRI support requires adding a call to
> iopf_queue_flush_dev() after flushing the hardware page fault queue.
>
> The current implementation of iopf_queue_flush_dev() is a simplified
> version. It is only suitable for SVA case in which the processing of iopf
> is implemented in the inner loop of the iommu subsystem.
>
> Improve this interface to make it also work for handling iopf out of the
> iommu core.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> include/linux/iommu.h | 4 ++--
> drivers/iommu/intel/svm.c | 2 +-
> drivers/iommu/io-pgfault.c | 40
> ++++++++++++++++++++++++++++++++++++--
> 3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 77ad33ffe3ac..465e23e945d0 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -1275,7 +1275,7 @@ iommu_sva_domain_alloc(struct device *dev,
> struct
> mm_struct *mm)
> #ifdef CONFIG_IOMMU_IOPF
> int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
> int iopf_queue_remove_device(struct iopf_queue *queue, struct device
> *dev);
> -int iopf_queue_flush_dev(struct device *dev);
> +int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid);
> struct iopf_queue *iopf_queue_alloc(const char *name);
> void iopf_queue_free(struct iopf_queue *queue);
> int iopf_queue_discard_partial(struct iopf_queue *queue);
> @@ -1295,7 +1295,7 @@ iopf_queue_remove_device(struct iopf_queue
> *queue,
> struct device *dev)
> return -ENODEV;
> }
>
> -static inline int iopf_queue_flush_dev(struct device *dev)
> +static inline int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
> {
> return -ENODEV;
> }
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 780c5bd73ec2..4c3f4533e337 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -495,7 +495,7 @@ void intel_drain_pasid_prq(struct device *dev, u32
> pasid)
> goto prq_retry;
> }
>
> - iopf_queue_flush_dev(dev);
> + iopf_queue_flush_dev(dev, pasid);
>
> /*
> * Perform steps described in VT-d spec CH7.10 to drain page
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> index 3e6845bc5902..84728fb89ac7 100644
> --- a/drivers/iommu/io-pgfault.c
> +++ b/drivers/iommu/io-pgfault.c
> @@ -309,17 +309,53 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
> *
> * Return: 0 on success and <0 on error.
> */
> -int iopf_queue_flush_dev(struct device *dev)
> +int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
> {
> struct iommu_fault_param *iopf_param =
> iopf_get_dev_fault_param(dev);
> + const struct iommu_ops *ops = dev_iommu_ops(dev);
> + struct iommu_page_response resp;
> + struct iopf_fault *iopf, *next;
> + int ret = 0;
>
> if (!iopf_param)
> return -ENODEV;
>
> flush_workqueue(iopf_param->queue->wq);
> +
> + mutex_lock(&iopf_param->lock);
> + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
> + if (!(iopf->fault.prm.flags &
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
> + iopf->fault.prm.pasid != pasid)
> + break;
> +
> + list_del(&iopf->list);
> + kfree(iopf);
> + }
> +
> + list_for_each_entry_safe(iopf, next, &iopf_param->faults, list) {
> + if (!(iopf->fault.prm.flags &
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
> + iopf->fault.prm.pasid != pasid)
> + continue;
> +
> + memset(&resp, 0, sizeof(struct iommu_page_response));
> + resp.pasid = iopf->fault.prm.pasid;
> + resp.grpid = iopf->fault.prm.grpid;
> + resp.code = IOMMU_PAGE_RESP_INVALID;
> +
> + if (iopf->fault.prm.flags &
> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)
> + resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
Out of curiosity. Is it a valid configuration which has REQUEST_PASID_VALID
set but RESP_PASID_VALID cleared? I'm unclear why another response
flag is required beyond what the request flag has told...
> +
> + ret = ops->page_response(dev, iopf, &resp);
> + if (ret)
> + break;
> +
> + list_del(&iopf->list);
> + kfree(iopf);
> + }
> + mutex_unlock(&iopf_param->lock);
> iopf_put_dev_fault_param(iopf_param);
>
> - return 0;
> + return ret;
> }
> EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
>
This looks OK. Another nit is that the warning of "no pending PRQ"
in iommu_page_response() should be removed given with above
change it's expected for iommufd responses to be received after this
flush operation in iommu core.
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-11 6:35 ` Tian, Kevin
@ 2023-09-11 12:26 ` Baolu Lu
2023-09-13 2:25 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-09-11 12:26 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Hi Kevin,
Thanks for looking at my patches.
On 2023/9/11 14:35, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Tuesday, September 5, 2023 1:20 PM
>>
>> I added below patch to address the iopf_queue_flush_dev() issue. What do
>> you think of this?
>>
>> iommu: Improve iopf_queue_flush_dev()
>>
>> The iopf_queue_flush_dev() is called by the iommu driver before releasing
>> a PASID. It ensures that all pending faults for this PASID have been
>> handled or cancelled, and won't hit the address space that reuses this
>> PASID. The driver must make sure that no new fault is added to the queue.
>>
>> The SMMUv3 driver doesn't use it because it only implements the
>> Arm-specific stall fault model where DMA transactions are held in the SMMU
>> while waiting for the OS to handle iopf's. Since a device driver must
>> complete all DMA transactions before detaching domain, there are no
>> pending iopf's with the stall model. PRI support requires adding a call to
>> iopf_queue_flush_dev() after flushing the hardware page fault queue.
>>
>> The current implementation of iopf_queue_flush_dev() is a simplified
>> version. It is only suitable for SVA case in which the processing of iopf
>> is implemented in the inner loop of the iommu subsystem.
>>
>> Improve this interface to make it also work for handling iopf out of the
>> iommu core.
>>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>> include/linux/iommu.h | 4 ++--
>> drivers/iommu/intel/svm.c | 2 +-
>> drivers/iommu/io-pgfault.c | 40
>> ++++++++++++++++++++++++++++++++++++--
>> 3 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 77ad33ffe3ac..465e23e945d0 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -1275,7 +1275,7 @@ iommu_sva_domain_alloc(struct device *dev,
>> struct
>> mm_struct *mm)
>> #ifdef CONFIG_IOMMU_IOPF
>> int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
>> int iopf_queue_remove_device(struct iopf_queue *queue, struct device
>> *dev);
>> -int iopf_queue_flush_dev(struct device *dev);
>> +int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid);
>> struct iopf_queue *iopf_queue_alloc(const char *name);
>> void iopf_queue_free(struct iopf_queue *queue);
>> int iopf_queue_discard_partial(struct iopf_queue *queue);
>> @@ -1295,7 +1295,7 @@ iopf_queue_remove_device(struct iopf_queue
>> *queue,
>> struct device *dev)
>> return -ENODEV;
>> }
>>
>> -static inline int iopf_queue_flush_dev(struct device *dev)
>> +static inline int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
>> {
>> return -ENODEV;
>> }
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index 780c5bd73ec2..4c3f4533e337 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -495,7 +495,7 @@ void intel_drain_pasid_prq(struct device *dev, u32
>> pasid)
>> goto prq_retry;
>> }
>>
>> - iopf_queue_flush_dev(dev);
>> + iopf_queue_flush_dev(dev, pasid);
>>
>> /*
>> * Perform steps described in VT-d spec CH7.10 to drain page
>> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
>> index 3e6845bc5902..84728fb89ac7 100644
>> --- a/drivers/iommu/io-pgfault.c
>> +++ b/drivers/iommu/io-pgfault.c
>> @@ -309,17 +309,53 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
>> *
>> * Return: 0 on success and <0 on error.
>> */
>> -int iopf_queue_flush_dev(struct device *dev)
>> +int iopf_queue_flush_dev(struct device *dev, ioasid_t pasid)
>> {
>> struct iommu_fault_param *iopf_param =
>> iopf_get_dev_fault_param(dev);
>> + const struct iommu_ops *ops = dev_iommu_ops(dev);
>> + struct iommu_page_response resp;
>> + struct iopf_fault *iopf, *next;
>> + int ret = 0;
>>
>> if (!iopf_param)
>> return -ENODEV;
>>
>> flush_workqueue(iopf_param->queue->wq);
>> +
>> + mutex_lock(&iopf_param->lock);
>> + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
>> + if (!(iopf->fault.prm.flags &
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
>> + iopf->fault.prm.pasid != pasid)
>> + break;
>> +
>> + list_del(&iopf->list);
>> + kfree(iopf);
>> + }
>> +
>> + list_for_each_entry_safe(iopf, next, &iopf_param->faults, list) {
>> + if (!(iopf->fault.prm.flags &
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) ||
>> + iopf->fault.prm.pasid != pasid)
>> + continue;
>> +
>> + memset(&resp, 0, sizeof(struct iommu_page_response));
>> + resp.pasid = iopf->fault.prm.pasid;
>> + resp.grpid = iopf->fault.prm.grpid;
>> + resp.code = IOMMU_PAGE_RESP_INVALID;
>> +
>> + if (iopf->fault.prm.flags &
>> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)
>> + resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
>
> Out of curiosity. Is it a valid configuration which has REQUEST_PASID_VALID
> set but RESP_PASID_VALID cleared? I'm unclear why another response
> flag is required beyond what the request flag has told...
This seems to have uncovered a bug in VT-d driver.
The PCIe spec (Section 10.4.2.2) states:
"
If a Page Request has a PASID, the corresponding PRG Response Message
may optionally contain one as well.
If the PRG Response PASID Required bit is Clear, PRG Response Messages
do not have a PASID. If the PRG Response PASID Required bit is Set, PRG
Response Messages have a PASID if the Page Request also had one. The
Function is permitted to use the PASID value from the prefix in
conjunction with the PRG Index to match requests and responses.
"
The "PRG Response PASID Required bit" is a read-only field in the PCI
page request status register. It is represented by
"pdev->pasid_required".
So below code in VT-d driver is not correct:
542 static int intel_svm_prq_report(struct intel_iommu *iommu, struct
device *dev,
543 struct page_req_dsc *desc)
544 {
[...]
556
557 if (desc->lpig)
558 event.fault.prm.flags |=
IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
559 if (desc->pasid_present) {
560 event.fault.prm.flags |=
IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
561 event.fault.prm.flags |=
IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
562 }
[...]
The right logic should be
if (pdev->pasid_required)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
Thoughts?
>> +
>> + ret = ops->page_response(dev, iopf, &resp);
>> + if (ret)
>> + break;
>> +
>> + list_del(&iopf->list);
>> + kfree(iopf);
>> + }
>> + mutex_unlock(&iopf_param->lock);
>> iopf_put_dev_fault_param(iopf_param);
>>
>> - return 0;
>> + return ret;
>> }
>> EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
>>
>
> This looks OK. Another nit is that the warning of "no pending PRQ"
> in iommu_page_response() should be removed given with above
> change it's expected for iommufd responses to be received after this
> flush operation in iommu core.
Yeah! Addressed.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-11 12:26 ` Baolu Lu
@ 2023-09-13 2:25 ` Tian, Kevin
2023-09-13 2:44 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-09-13 2:25 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Monday, September 11, 2023 8:27 PM
>
> >
> > Out of curiosity. Is it a valid configuration which has
> REQUEST_PASID_VALID
> > set but RESP_PASID_VALID cleared? I'm unclear why another response
> > flag is required beyond what the request flag has told...
>
> This seems to have uncovered a bug in VT-d driver.
>
> The PCIe spec (Section 10.4.2.2) states:
>
> "
> If a Page Request has a PASID, the corresponding PRG Response Message
> may optionally contain one as well.
>
> If the PRG Response PASID Required bit is Clear, PRG Response Messages
> do not have a PASID. If the PRG Response PASID Required bit is Set, PRG
> Response Messages have a PASID if the Page Request also had one. The
> Function is permitted to use the PASID value from the prefix in
> conjunction with the PRG Index to match requests and responses.
> "
>
> The "PRG Response PASID Required bit" is a read-only field in the PCI
> page request status register. It is represented by
> "pdev->pasid_required".
>
> So below code in VT-d driver is not correct:
>
> 542 static int intel_svm_prq_report(struct intel_iommu *iommu, struct
> device *dev,
> 543 struct page_req_dsc *desc)
> 544 {
>
> [...]
>
> 556
> 557 if (desc->lpig)
> 558 event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> 559 if (desc->pasid_present) {
> 560 event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> 561 event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
> 562 }
> [...]
>
> The right logic should be
>
> if (pdev->pasid_required)
> event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
>
> Thoughts?
>
yes, it's the right fix. We haven't seen any bug report probably because
all SVM-capable devices have pasid_required set? 😊
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-13 2:25 ` Tian, Kevin
@ 2023-09-13 2:44 ` Baolu Lu
0 siblings, 0 replies; 48+ messages in thread
From: Baolu Lu @ 2023-09-13 2:44 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 9/13/23 10:25 AM, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Monday, September 11, 2023 8:27 PM
>>
>>>
>>> Out of curiosity. Is it a valid configuration which has
>> REQUEST_PASID_VALID
>>> set but RESP_PASID_VALID cleared? I'm unclear why another response
>>> flag is required beyond what the request flag has told...
>>
>> This seems to have uncovered a bug in VT-d driver.
>>
>> The PCIe spec (Section 10.4.2.2) states:
>>
>> "
>> If a Page Request has a PASID, the corresponding PRG Response Message
>> may optionally contain one as well.
>>
>> If the PRG Response PASID Required bit is Clear, PRG Response Messages
>> do not have a PASID. If the PRG Response PASID Required bit is Set, PRG
>> Response Messages have a PASID if the Page Request also had one. The
>> Function is permitted to use the PASID value from the prefix in
>> conjunction with the PRG Index to match requests and responses.
>> "
>>
>> The "PRG Response PASID Required bit" is a read-only field in the PCI
>> page request status register. It is represented by
>> "pdev->pasid_required".
>>
>> So below code in VT-d driver is not correct:
>>
>> 542 static int intel_svm_prq_report(struct intel_iommu *iommu, struct
>> device *dev,
>> 543 struct page_req_dsc *desc)
>> 544 {
>>
>> [...]
>>
>> 556
>> 557 if (desc->lpig)
>> 558 event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> 559 if (desc->pasid_present) {
>> 560 event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> 561 event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
>> 562 }
>> [...]
>>
>> The right logic should be
>>
>> if (pdev->pasid_required)
>> event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
>>
>> Thoughts?
>>
>
> yes, it's the right fix. We haven't seen any bug report probably because
> all SVM-capable devices have pasid_required set? 😊
More precisely, the idxd devices have pasid_required set. :-)
Anyway, I will post a formal fix for this.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
[parent not found: <BN9PR11MB527624F1CC4A545FBAE3C9C98CE6A@BN9PR11MB5276.namprd11.prod.outlook.com>]
* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
[not found] ` <BN9PR11MB527624F1CC4A545FBAE3C9C98CE6A@BN9PR11MB5276.namprd11.prod.outlook.com>
@ 2023-08-30 8:50 ` Tian, Kevin
2023-08-31 9:42 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-08-30 8:50 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Tian, Kevin
> Sent: Wednesday, August 30, 2023 3:44 PM
>
> > From: Baolu Lu <baolu.lu@linux.intel.com>
> > Sent: Saturday, August 26, 2023 4:01 PM
> >
> > On 8/25/23 4:17 PM, Tian, Kevin wrote:
> > >> +
> > >> /**
> > >> * iopf_queue_flush_dev - Ensure that all queued faults have been
> > >> processed
> > >> * @dev: the endpoint whose faults need to be flushed.
> > > Presumably we also need a flush callback per domain given now
> > > the use of workqueue is optional then flush_workqueue() might
> > > not be sufficient.
> > >
> >
> > The iopf_queue_flush_dev() function flushes all pending faults from the
> > IOMMU queue for a specific device. It has no means to flush fault queues
> > out of iommu core.
> >
> > The iopf_queue_flush_dev() function is typically called when a domain is
> > detaching from a PASID. Hence it's necessary to flush the pending faults
> > from top to bottom. For example, iommufd should flush pending faults in
> > its fault queues after detaching the domain from the pasid.
> >
>
> Is there an ordering problem? The last step of intel_svm_drain_prq()
> in the detaching path issues a set of descriptors to drain page requests
> and responses in hardware. It cannot complete if not all software queues
> are drained and it's counter-intuitive to drain a software queue after
> the hardware draining has already been completed.
to be clear it's correct to drain request queues from bottom to top as the
lower level queue is the input to the higher level queue. But for response
the lowest draining needs to wait for response from higher levels. It's
interesting that intel-iommu driver combines draining hw page requests
and responses in one step in intel_svm_drain_prq(). this also needs some
consideration regarding to iommufd...
>
> btw just flushing requests is probably insufficient in iommufd case since
> the responses are received asynchronously. It requires an interface to
> drain both requests and responses (presumably with timeouts in case
> of a malicious guest which never responds) in the detach path.
>
> it's not a problem for sva as responses are synchrounsly delivered after
> handling mm fault. So fine to not touch it in this series but certainly
> this area needs more work when moving to support iommufd. 😊
>
> btw why is iopf_queue_flush_dev() called only in intel-iommu driver?
> Isn't it a common requirement for all sva-capable drivers?
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 8:50 ` Tian, Kevin
@ 2023-08-31 9:42 ` Baolu Lu
0 siblings, 0 replies; 48+ messages in thread
From: Baolu Lu @ 2023-08-31 9:42 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/8/30 16:50, Tian, Kevin wrote:
>> From: Tian, Kevin
>> Sent: Wednesday, August 30, 2023 3:44 PM
>>
>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>> Sent: Saturday, August 26, 2023 4:01 PM
>>>
>>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>>> +
>>>>> /**
>>>>> * iopf_queue_flush_dev - Ensure that all queued faults have been
>>>>> processed
>>>>> * @dev: the endpoint whose faults need to be flushed.
>>>> Presumably we also need a flush callback per domain given now
>>>> the use of workqueue is optional then flush_workqueue() might
>>>> not be sufficient.
>>>>
>>> The iopf_queue_flush_dev() function flushes all pending faults from the
>>> IOMMU queue for a specific device. It has no means to flush fault queues
>>> out of iommu core.
>>>
>>> The iopf_queue_flush_dev() function is typically called when a domain is
>>> detaching from a PASID. Hence it's necessary to flush the pending faults
>>> from top to bottom. For example, iommufd should flush pending faults in
>>> its fault queues after detaching the domain from the pasid.
>>>
>> Is there an ordering problem? The last step of intel_svm_drain_prq()
>> in the detaching path issues a set of descriptors to drain page requests
>> and responses in hardware. It cannot complete if not all software queues
>> are drained and it's counter-intuitive to drain a software queue after
>> the hardware draining has already been completed.
> to be clear it's correct to drain request queues from bottom to top as the
> lower level queue is the input to the higher level queue. But for response
> the lowest draining needs to wait for response from higher levels. It's
> interesting that intel-iommu driver combines draining hw page requests
> and responses in one step in intel_svm_drain_prq(). this also needs some
> consideration regarding to iommufd...
>
I agree with you. For the responses, we can iterate over the list of
page requests pending to respond. If any fault matches the pasid and the
device, we can drain it by responding IOMMU_PAGE_RESP_INVALID to the
device.
After that the responses for the drained faults will be dropped by the
iommu_page_response() interface.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 8:17 ` Tian, Kevin
2023-08-26 7:32 ` Baolu Lu
2023-08-26 8:01 ` Baolu Lu
@ 2023-08-26 8:04 ` Baolu Lu
2023-08-30 7:55 ` Tian, Kevin
2023-08-26 8:08 ` Baolu Lu
3 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-08-26 8:04 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 8/25/23 4:17 PM, Tian, Kevin wrote:
>> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
>> +{
>> + struct iommu_fault_param *iopf_param = dev->iommu-
>>> fault_param;
>> + struct iopf_fault *iopf;
>> +
>> + if (!iopf_param)
>> + return;
>> +
>> + mutex_lock(&iopf_param->lock);
>> + list_for_each_entry(iopf, &iopf_param->partial, list) {
>> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
>> + break;
>> + }
> partial list is protected by dev_iommu lock.
>
Ah, do you mind elaborating a bit more? In my mind, partial list is
protected by dev_iommu->fault_param->lock.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-26 8:04 ` Baolu Lu
@ 2023-08-30 7:55 ` Tian, Kevin
2023-08-31 11:24 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-08-30 7:55 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Saturday, August 26, 2023 4:04 PM
>
> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
> >> +{
> >> + struct iommu_fault_param *iopf_param = dev->iommu-
> >>> fault_param;
> >> + struct iopf_fault *iopf;
> >> +
> >> + if (!iopf_param)
> >> + return;
> >> +
> >> + mutex_lock(&iopf_param->lock);
> >> + list_for_each_entry(iopf, &iopf_param->partial, list) {
> >> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
> >> + break;
> >> + }
> > partial list is protected by dev_iommu lock.
> >
>
> Ah, do you mind elaborating a bit more? In my mind, partial list is
> protected by dev_iommu->fault_param->lock.
>
well, it's not how the code is currently written. iommu_queue_iopf()
doesn't hold dev_iommu->fault_param->lock to update the partial
list.
while at it looks there is also a mislocking in iopf_queue_discard_partial()
which only acquires queue->lock.
So we have three places touching the partial list all with different locks:
- iommu_queue_iopf() relies on dev_iommu->lock
- iopf_queue_discard_partial() relies on queue->lock
- this new assert function uses dev_iommu->fault_param->lock
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-30 7:55 ` Tian, Kevin
@ 2023-08-31 11:24 ` Baolu Lu
2023-09-01 2:50 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-08-31 11:24 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/8/30 15:55, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Saturday, August 26, 2023 4:04 PM
>>
>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
>>>> +{
>>>> + struct iommu_fault_param *iopf_param = dev->iommu-
>>>>> fault_param;
>>>> + struct iopf_fault *iopf;
>>>> +
>>>> + if (!iopf_param)
>>>> + return;
>>>> +
>>>> + mutex_lock(&iopf_param->lock);
>>>> + list_for_each_entry(iopf, &iopf_param->partial, list) {
>>>> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
>>>> + break;
>>>> + }
>>> partial list is protected by dev_iommu lock.
>>>
>>
>> Ah, do you mind elaborating a bit more? In my mind, partial list is
>> protected by dev_iommu->fault_param->lock.
>>
>
> well, it's not how the code is currently written. iommu_queue_iopf()
> doesn't hold dev_iommu->fault_param->lock to update the partial
> list.
>
> while at it looks there is also a mislocking in iopf_queue_discard_partial()
> which only acquires queue->lock.
>
> So we have three places touching the partial list all with different locks:
>
> - iommu_queue_iopf() relies on dev_iommu->lock
> - iopf_queue_discard_partial() relies on queue->lock
> - this new assert function uses dev_iommu->fault_param->lock
Yeah, I see your point now. Thanks for the explanation.
So, my understanding is that dev_iommu->lock protects the whole
pointer of dev_iommu->fault_param, while dev_iommu->fault_param->lock
protects the lists inside it.
Is this locking mechanism different from what you think?
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-31 11:24 ` Baolu Lu
@ 2023-09-01 2:50 ` Tian, Kevin
2023-09-05 5:24 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-09-01 2:50 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Thursday, August 31, 2023 7:25 PM
>
> On 2023/8/30 15:55, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Saturday, August 26, 2023 4:04 PM
> >>
> >> On 8/25/23 4:17 PM, Tian, Kevin wrote:
> >>>> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
> >>>> +{
> >>>> + struct iommu_fault_param *iopf_param = dev->iommu-
> >>>>> fault_param;
> >>>> + struct iopf_fault *iopf;
> >>>> +
> >>>> + if (!iopf_param)
> >>>> + return;
> >>>> +
> >>>> + mutex_lock(&iopf_param->lock);
> >>>> + list_for_each_entry(iopf, &iopf_param->partial, list) {
> >>>> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
> >>>> + break;
> >>>> + }
> >>> partial list is protected by dev_iommu lock.
> >>>
> >>
> >> Ah, do you mind elaborating a bit more? In my mind, partial list is
> >> protected by dev_iommu->fault_param->lock.
> >>
> >
> > well, it's not how the code is currently written. iommu_queue_iopf()
> > doesn't hold dev_iommu->fault_param->lock to update the partial
> > list.
> >
> > while at it looks there is also a mislocking in iopf_queue_discard_partial()
> > which only acquires queue->lock.
> >
> > So we have three places touching the partial list all with different locks:
> >
> > - iommu_queue_iopf() relies on dev_iommu->lock
> > - iopf_queue_discard_partial() relies on queue->lock
> > - this new assert function uses dev_iommu->fault_param->lock
>
> Yeah, I see your point now. Thanks for the explanation.
>
> So, my understanding is that dev_iommu->lock protects the whole
> pointer of dev_iommu->fault_param, while dev_iommu->fault_param->lock
> protects the lists inside it.
>
yes. let's use fault_param->lock consistently for those lists in all paths.
^ permalink raw reply [flat|nested] 48+ messages in thread* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-01 2:50 ` Tian, Kevin
@ 2023-09-05 5:24 ` Baolu Lu
2023-09-11 6:57 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-09-05 5:24 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/9/1 10:50, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Thursday, August 31, 2023 7:25 PM
>>
>> On 2023/8/30 15:55, Tian, Kevin wrote:
>>>> From: Baolu Lu <baolu.lu@linux.intel.com>
>>>> Sent: Saturday, August 26, 2023 4:04 PM
>>>>
>>>> On 8/25/23 4:17 PM, Tian, Kevin wrote:
>>>>>> +static void assert_no_pending_iopf(struct device *dev, ioasid_t pasid)
>>>>>> +{
>>>>>> + struct iommu_fault_param *iopf_param = dev->iommu-
>>>>>>> fault_param;
>>>>>> + struct iopf_fault *iopf;
>>>>>> +
>>>>>> + if (!iopf_param)
>>>>>> + return;
>>>>>> +
>>>>>> + mutex_lock(&iopf_param->lock);
>>>>>> + list_for_each_entry(iopf, &iopf_param->partial, list) {
>>>>>> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
>>>>>> + break;
>>>>>> + }
>>>>> partial list is protected by dev_iommu lock.
>>>>>
>>>>
>>>> Ah, do you mind elaborating a bit more? In my mind, partial list is
>>>> protected by dev_iommu->fault_param->lock.
>>>>
>>>
>>> well, it's not how the code is currently written. iommu_queue_iopf()
>>> doesn't hold dev_iommu->fault_param->lock to update the partial
>>> list.
>>>
>>> while at it looks there is also a mislocking in iopf_queue_discard_partial()
>>> which only acquires queue->lock.
>>>
>>> So we have three places touching the partial list all with different locks:
>>>
>>> - iommu_queue_iopf() relies on dev_iommu->lock
>>> - iopf_queue_discard_partial() relies on queue->lock
>>> - this new assert function uses dev_iommu->fault_param->lock
>>
>> Yeah, I see your point now. Thanks for the explanation.
>>
>> So, my understanding is that dev_iommu->lock protects the whole
>> pointer of dev_iommu->fault_param, while dev_iommu->fault_param->lock
>> protects the lists inside it.
>>
>
> yes. let's use fault_param->lock consistently for those lists in all paths.
Hi Kevin,
I am trying to address this issue in below patch. Does it looks sane to
you?
iommu: Consolidate per-device fault data management
The per-device fault data is a data structure that is used to store
information about faults that occur on a device. This data is allocated
when IOPF is enabled on the device and freed when IOPF is disabled. The
data is used in the paths of iopf reporting, handling, responding, and
draining.
The fault data is protected by two locks:
- dev->iommu->lock: This lock is used to protect the allocation and
freeing of the fault data.
- dev->iommu->fault_parameter->lock: This lock is used to protect the
fault data itself.
Improve the iopf code to enforce this lock mechanism and add a reference
counter in the fault data to avoid use-after-free issue.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
include/linux/iommu.h | 3 +
drivers/iommu/io-pgfault.c | 122 +++++++++++++++++++++++--------------
2 files changed, 79 insertions(+), 46 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1697ac168f05..77ad33ffe3ac 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -480,6 +480,8 @@ struct iommu_device {
/**
* struct iommu_fault_param - per-device IOMMU fault data
* @lock: protect pending faults list
+ * @users: user counter to manage the lifetime of the data, this field
+ * is protected by dev->iommu->lock.
* @dev: the device that owns this param
* @queue: IOPF queue
* @queue_list: index into queue->devices
@@ -489,6 +491,7 @@ struct iommu_device {
*/
struct iommu_fault_param {
struct mutex lock;
+ int users;
struct device *dev;
struct iopf_queue *queue;
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 7e5c6798ce24..3e6845bc5902 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -26,6 +26,49 @@ void iopf_free_group(struct iopf_group *group)
}
EXPORT_SYMBOL_GPL(iopf_free_group);
+/*
+ * Return the fault parameter of a device if it exists. Otherwise,
return NULL.
+ * On a successful return, the caller takes a reference of this
parameter and
+ * should put it after use by calling iopf_put_dev_fault_param().
+ */
+static struct iommu_fault_param *iopf_get_dev_fault_param(struct device
*dev)
+{
+ struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *fault_param;
+
+ if (!param)
+ return NULL;
+
+ mutex_lock(¶m->lock);
+ fault_param = param->fault_param;
+ if (fault_param)
+ fault_param->users++;
+ mutex_unlock(¶m->lock);
+
+ return fault_param;
+}
+
+/* Caller must hold a reference of the fault parameter. */
+static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
+{
+ struct device *dev = fault_param->dev;
+ struct dev_iommu *param = dev->iommu;
+
+ mutex_lock(¶m->lock);
+ if (WARN_ON(fault_param->users <= 0 ||
+ fault_param != param->fault_param)) {
+ mutex_unlock(¶m->lock);
+ return;
+ }
+
+ if (--fault_param->users == 0) {
+ param->fault_param = NULL;
+ kfree(fault_param);
+ put_device(dev);
+ }
+ mutex_unlock(¶m->lock);
+}
+
/**
* iommu_handle_iopf - IO Page Fault handler
* @fault: fault event
@@ -72,23 +115,14 @@ static int iommu_handle_iopf(struct iommu_fault
*fault, struct device *dev)
struct iopf_group *group;
struct iommu_domain *domain;
struct iopf_fault *iopf, *next;
- struct iommu_fault_param *iopf_param;
- struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *iopf_param = dev->iommu->fault_param;
- lockdep_assert_held(¶m->lock);
+ lockdep_assert_held(&iopf_param->lock);
if (fault->type != IOMMU_FAULT_PAGE_REQ)
/* Not a recoverable page fault */
return -EOPNOTSUPP;
- /*
- * As long as we're holding param->lock, the queue can't be unlinked
- * from the device and therefore cannot disappear.
- */
- iopf_param = param->fault_param;
- if (!iopf_param)
- return -ENODEV;
-
if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
if (!iopf)
@@ -167,18 +201,15 @@ static int iommu_handle_iopf(struct iommu_fault
*fault, struct device *dev)
*/
int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
{
- struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *fault_param;
struct iopf_fault *evt_pending = NULL;
- struct iommu_fault_param *fparam;
int ret = 0;
- if (!param || !evt)
+ fault_param = iopf_get_dev_fault_param(dev);
+ if (!fault_param)
return -EINVAL;
- /* we only report device fault if there is a handler registered */
- mutex_lock(¶m->lock);
- fparam = param->fault_param;
-
+ mutex_lock(&fault_param->lock);
if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
(evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
evt_pending = kmemdup(evt, sizeof(struct iopf_fault),
@@ -187,20 +218,18 @@ int iommu_report_device_fault(struct device *dev,
struct iopf_fault *evt)
ret = -ENOMEM;
goto done_unlock;
}
- mutex_lock(&fparam->lock);
- list_add_tail(&evt_pending->list, &fparam->faults);
- mutex_unlock(&fparam->lock);
+ list_add_tail(&evt_pending->list, &fault_param->faults);
}
ret = iommu_handle_iopf(&evt->fault, dev);
if (ret && evt_pending) {
- mutex_lock(&fparam->lock);
list_del(&evt_pending->list);
- mutex_unlock(&fparam->lock);
kfree(evt_pending);
}
done_unlock:
- mutex_unlock(¶m->lock);
+ mutex_unlock(&fault_param->lock);
+ iopf_put_dev_fault_param(fault_param);
+
return ret;
}
EXPORT_SYMBOL_GPL(iommu_report_device_fault);
@@ -212,19 +241,20 @@ int iommu_page_response(struct device *dev,
int ret = -EINVAL;
struct iopf_fault *evt;
struct iommu_fault_page_request *prm;
- struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *fault_param;
const struct iommu_ops *ops = dev_iommu_ops(dev);
bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
if (!ops->page_response)
return -ENODEV;
- if (!param || !param->fault_param)
- return -EINVAL;
+ fault_param = iopf_get_dev_fault_param(dev);
+ if (!fault_param)
+ return -ENODEV;
/* Only send response if there is a fault report pending */
- mutex_lock(¶m->fault_param->lock);
- if (list_empty(¶m->fault_param->faults)) {
+ mutex_lock(&fault_param->lock);
+ if (list_empty(&fault_param->faults)) {
dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
goto done_unlock;
}
@@ -232,7 +262,7 @@ int iommu_page_response(struct device *dev,
* Check if we have a matching page request pending to respond,
* otherwise return -EINVAL
*/
- list_for_each_entry(evt, ¶m->fault_param->faults, list) {
+ list_for_each_entry(evt, &fault_param->faults, list) {
prm = &evt->fault.prm;
if (prm->grpid != msg->grpid)
continue;
@@ -260,7 +290,9 @@ int iommu_page_response(struct device *dev,
}
done_unlock:
- mutex_unlock(¶m->fault_param->lock);
+ mutex_unlock(&fault_param->lock);
+ iopf_put_dev_fault_param(fault_param);
+
return ret;
}
EXPORT_SYMBOL_GPL(iommu_page_response);
@@ -279,22 +311,15 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
*/
int iopf_queue_flush_dev(struct device *dev)
{
- int ret = 0;
- struct iommu_fault_param *iopf_param;
- struct dev_iommu *param = dev->iommu;
+ struct iommu_fault_param *iopf_param = iopf_get_dev_fault_param(dev);
- if (!param)
+ if (!iopf_param)
return -ENODEV;
- mutex_lock(¶m->lock);
- iopf_param = param->fault_param;
- if (iopf_param)
- flush_workqueue(iopf_param->queue->wq);
- else
- ret = -ENODEV;
- mutex_unlock(¶m->lock);
+ flush_workqueue(iopf_param->queue->wq);
+ iopf_put_dev_fault_param(iopf_param);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
@@ -318,11 +343,13 @@ int iopf_queue_discard_partial(struct iopf_queue
*queue)
mutex_lock(&queue->lock);
list_for_each_entry(iopf_param, &queue->devices, queue_list) {
+ mutex_lock(&iopf_param->lock);
list_for_each_entry_safe(iopf, next, &iopf_param->partial,
list) {
list_del(&iopf->list);
kfree(iopf);
}
+ mutex_unlock(&iopf_param->lock);
}
mutex_unlock(&queue->lock);
return 0;
@@ -361,6 +388,7 @@ int iopf_queue_add_device(struct iopf_queue *queue,
struct device *dev)
INIT_LIST_HEAD(&fault_param->faults);
INIT_LIST_HEAD(&fault_param->partial);
fault_param->dev = dev;
+ fault_param->users = 1;
list_add(&fault_param->queue_list, &queue->devices);
fault_param->queue = queue;
@@ -413,9 +441,11 @@ int iopf_queue_remove_device(struct iopf_queue
*queue, struct device *dev)
list_for_each_entry_safe(iopf, next, &fault_param->partial, list)
kfree(iopf);
- param->fault_param = NULL;
- kfree(fault_param);
- put_device(dev);
+ if (--fault_param->users == 0) {
+ param->fault_param = NULL;
+ kfree(fault_param);
+ put_device(dev);
+ }
unlock:
mutex_unlock(¶m->lock);
mutex_unlock(&queue->lock);
--
2.34.1
Best regards,
baolu
^ permalink raw reply related [flat|nested] 48+ messages in thread* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-05 5:24 ` Baolu Lu
@ 2023-09-11 6:57 ` Tian, Kevin
2023-09-11 12:46 ` Baolu Lu
0 siblings, 1 reply; 48+ messages in thread
From: Tian, Kevin @ 2023-09-11 6:57 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Tuesday, September 5, 2023 1:24 PM
>
> Hi Kevin,
>
> I am trying to address this issue in below patch. Does it looks sane to
> you?
>
> iommu: Consolidate per-device fault data management
>
> The per-device fault data is a data structure that is used to store
> information about faults that occur on a device. This data is allocated
> when IOPF is enabled on the device and freed when IOPF is disabled. The
> data is used in the paths of iopf reporting, handling, responding, and
> draining.
>
> The fault data is protected by two locks:
>
> - dev->iommu->lock: This lock is used to protect the allocation and
> freeing of the fault data.
> - dev->iommu->fault_parameter->lock: This lock is used to protect the
> fault data itself.
>
> Improve the iopf code to enforce this lock mechanism and add a reference
> counter in the fault data to avoid use-after-free issue.
>
Can you elaborate the use-after-free issue and why a new user count
is required?
btw a Fix tag is required given this mislocking issue has been there for
quite some time...
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-11 6:57 ` Tian, Kevin
@ 2023-09-11 12:46 ` Baolu Lu
2023-09-13 2:34 ` Tian, Kevin
0 siblings, 1 reply; 48+ messages in thread
From: Baolu Lu @ 2023-09-11 12:46 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/9/11 14:57, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Tuesday, September 5, 2023 1:24 PM
>>
>> Hi Kevin,
>>
>> I am trying to address this issue in below patch. Does it looks sane to
>> you?
>>
>> iommu: Consolidate per-device fault data management
>>
>> The per-device fault data is a data structure that is used to store
>> information about faults that occur on a device. This data is allocated
>> when IOPF is enabled on the device and freed when IOPF is disabled. The
>> data is used in the paths of iopf reporting, handling, responding, and
>> draining.
>>
>> The fault data is protected by two locks:
>>
>> - dev->iommu->lock: This lock is used to protect the allocation and
>> freeing of the fault data.
>> - dev->iommu->fault_parameter->lock: This lock is used to protect the
>> fault data itself.
>>
>> Improve the iopf code to enforce this lock mechanism and add a reference
>> counter in the fault data to avoid use-after-free issue.
>>
>
> Can you elaborate the use-after-free issue and why a new user count
> is required?
I was concerned that when iommufd uses iopf, page fault report/response
may occur simultaneously with enable/disable PRI.
Currently, this is not an issue as the enable/disable PRI is in its own
path. In the future, we may discard this interface and enable PRI when
attaching the first PRI-capable domain, and disable it when detaching
the last PRI-capable domain.
>
> btw a Fix tag is required given this mislocking issue has been there for
> quite some time...
I don't see any real issue fixed by this change. It's only a lock
refactoring after the code refactoring and preparing it for iommufd use.
Perhaps I missed anything?
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* RE: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-11 12:46 ` Baolu Lu
@ 2023-09-13 2:34 ` Tian, Kevin
2023-09-13 4:23 ` Baolu Lu
2023-09-13 6:18 ` Baolu Lu
0 siblings, 2 replies; 48+ messages in thread
From: Tian, Kevin @ 2023-09-13 2:34 UTC (permalink / raw)
To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: Liu, Yi L, Jacob Pan, iommu@lists.linux.dev, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Monday, September 11, 2023 8:46 PM
>
> On 2023/9/11 14:57, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Tuesday, September 5, 2023 1:24 PM
> >>
> >> Hi Kevin,
> >>
> >> I am trying to address this issue in below patch. Does it looks sane to
> >> you?
> >>
> >> iommu: Consolidate per-device fault data management
> >>
> >> The per-device fault data is a data structure that is used to store
> >> information about faults that occur on a device. This data is allocated
> >> when IOPF is enabled on the device and freed when IOPF is disabled. The
> >> data is used in the paths of iopf reporting, handling, responding, and
> >> draining.
> >>
> >> The fault data is protected by two locks:
> >>
> >> - dev->iommu->lock: This lock is used to protect the allocation and
> >> freeing of the fault data.
> >> - dev->iommu->fault_parameter->lock: This lock is used to protect the
> >> fault data itself.
> >>
> >> Improve the iopf code to enforce this lock mechanism and add a
> reference
> >> counter in the fault data to avoid use-after-free issue.
> >>
> >
> > Can you elaborate the use-after-free issue and why a new user count
> > is required?
>
> I was concerned that when iommufd uses iopf, page fault report/response
> may occur simultaneously with enable/disable PRI.
>
> Currently, this is not an issue as the enable/disable PRI is in its own
> path. In the future, we may discard this interface and enable PRI when
> attaching the first PRI-capable domain, and disable it when detaching
> the last PRI-capable domain.
Then let's not do it now until there is a real need after you have a
thorough design for iommufd.
>
> >
> > btw a Fix tag is required given this mislocking issue has been there for
> > quite some time...
>
> I don't see any real issue fixed by this change. It's only a lock
> refactoring after the code refactoring and preparing it for iommufd use.
> Perhaps I missed anything?
>
mislocking already exists today for the partial list:
- iommu_queue_iopf() uses dev->iommu->lock;
- iopf_queue_discard_partial() uses queue->lock;
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-13 2:34 ` Tian, Kevin
@ 2023-09-13 4:23 ` Baolu Lu
2023-09-13 6:18 ` Baolu Lu
1 sibling, 0 replies; 48+ messages in thread
From: Baolu Lu @ 2023-09-13 4:23 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 9/13/23 10:34 AM, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Monday, September 11, 2023 8:46 PM
>>
>> On 2023/9/11 14:57, Tian, Kevin wrote:
>>>> From: Baolu Lu <baolu.lu@linux.intel.com>
>>>> Sent: Tuesday, September 5, 2023 1:24 PM
>>>>
>>>> Hi Kevin,
>>>>
>>>> I am trying to address this issue in below patch. Does it looks sane to
>>>> you?
>>>>
>>>> iommu: Consolidate per-device fault data management
>>>>
>>>> The per-device fault data is a data structure that is used to store
>>>> information about faults that occur on a device. This data is allocated
>>>> when IOPF is enabled on the device and freed when IOPF is disabled. The
>>>> data is used in the paths of iopf reporting, handling, responding, and
>>>> draining.
>>>>
>>>> The fault data is protected by two locks:
>>>>
>>>> - dev->iommu->lock: This lock is used to protect the allocation and
>>>> freeing of the fault data.
>>>> - dev->iommu->fault_parameter->lock: This lock is used to protect the
>>>> fault data itself.
>>>>
>>>> Improve the iopf code to enforce this lock mechanism and add a
>> reference
>>>> counter in the fault data to avoid use-after-free issue.
>>>>
>>>
>>> Can you elaborate the use-after-free issue and why a new user count
>>> is required?
>>
>> I was concerned that when iommufd uses iopf, page fault report/response
>> may occur simultaneously with enable/disable PRI.
>>
>> Currently, this is not an issue as the enable/disable PRI is in its own
>> path. In the future, we may discard this interface and enable PRI when
>> attaching the first PRI-capable domain, and disable it when detaching
>> the last PRI-capable domain.
>
> Then let's not do it now until there is a real need after you have a
> thorough design for iommufd.
Okay, fair enough.
>
>>
>>>
>>> btw a Fix tag is required given this mislocking issue has been there for
>>> quite some time...
>>
>> I don't see any real issue fixed by this change. It's only a lock
>> refactoring after the code refactoring and preparing it for iommufd use.
>> Perhaps I missed anything?
>>
>
> mislocking already exists today for the partial list:
>
> - iommu_queue_iopf() uses dev->iommu->lock;
> - iopf_queue_discard_partial() uses queue->lock;
So, if it's worth it, let me try splitting a fix patch.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-09-13 2:34 ` Tian, Kevin
2023-09-13 4:23 ` Baolu Lu
@ 2023-09-13 6:18 ` Baolu Lu
1 sibling, 0 replies; 48+ messages in thread
From: Baolu Lu @ 2023-09-13 6:18 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 2023/9/13 10:34, Tian, Kevin wrote:
>> From: Baolu Lu<baolu.lu@linux.intel.com>
>> Sent: Monday, September 11, 2023 8:46 PM
>>
>> On 2023/9/11 14:57, Tian, Kevin wrote:
>>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>>> Sent: Tuesday, September 5, 2023 1:24 PM
>>>>
>>>> Hi Kevin,
>>>>
>>>> I am trying to address this issue in below patch. Does it looks sane to
>>>> you?
>>>>
>>>> iommu: Consolidate per-device fault data management
>>>>
>>>> The per-device fault data is a data structure that is used to store
>>>> information about faults that occur on a device. This data is allocated
>>>> when IOPF is enabled on the device and freed when IOPF is disabled. The
>>>> data is used in the paths of iopf reporting, handling, responding, and
>>>> draining.
>>>>
>>>> The fault data is protected by two locks:
>>>>
>>>> - dev->iommu->lock: This lock is used to protect the allocation and
>>>> freeing of the fault data.
>>>> - dev->iommu->fault_parameter->lock: This lock is used to protect the
>>>> fault data itself.
>>>>
>>>> Improve the iopf code to enforce this lock mechanism and add a
>> reference
>>>> counter in the fault data to avoid use-after-free issue.
>>>>
>>> Can you elaborate the use-after-free issue and why a new user count
>>> is required?
>> I was concerned that when iommufd uses iopf, page fault report/response
>> may occur simultaneously with enable/disable PRI.
>>
>> Currently, this is not an issue as the enable/disable PRI is in its own
>> path. In the future, we may discard this interface and enable PRI when
>> attaching the first PRI-capable domain, and disable it when detaching
>> the last PRI-capable domain.
> Then let's not do it now until there is a real need after you have a
> thorough design for iommufd.
I revisited this part of code and found that it's still valuable to make
the code clean and simple. The fault parameter is accessed in various
paths, such as reporting iopf, responding iopf, draining iopf's, adding
queue and removing queue. In each path, we need to repeat below locking
code:
mutex_lock(&dev->iommu->lock);
fault_param = dev->iommu->fault_param;
if (!fault_param) {
mutex_unlock(&dev->iommu->lock);
return -ENODEV;
}
/* use the fault parameter */
... ...
mutex_unlock(&dev->iommu->lock);
The order of the locks is also important. Otherwise, a possible deadlock
issue will be reported by lockdep.
By consolidating above code in iopf_get/put_dev_fault_param() helpers,
it could be simplified as:
fault_param = iopf_get_dev_fault_param(dev);
if (!fault_param)
return -ENODEV;
/* use the fault parameter */
... ...
iopf_put_dev_fault_param(fault_param);
The lock order issue is removed. And it will make the code simpler and
easier for maintenance.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic
2023-08-25 8:17 ` Tian, Kevin
` (2 preceding siblings ...)
2023-08-26 8:04 ` Baolu Lu
@ 2023-08-26 8:08 ` Baolu Lu
3 siblings, 0 replies; 48+ messages in thread
From: Baolu Lu @ 2023-08-26 8:08 UTC (permalink / raw)
To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
Jason Gunthorpe, Jean-Philippe Brucker, Nicolin Chen
Cc: baolu.lu, Liu, Yi L, Jacob Pan, iommu@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
On 8/25/23 4:17 PM, Tian, Kevin wrote:
>> +
>> + list_for_each_entry(iopf, &iopf_param->faults, list) {
>> + if (WARN_ON(iopf->fault.prm.pasid == pasid))
>> + break;
>> + }
>> + mutex_unlock(&iopf_param->lock);
>> +}
>> +
>> void iommu_detach_device(struct iommu_domain *domain, struct device
>> *dev)
>> {
>> struct iommu_group *group;
>> @@ -1959,6 +1980,7 @@ void iommu_detach_device(struct iommu_domain
>> *domain, struct device *dev)
>> if (!group)
>> return;
>>
>> + assert_no_pending_iopf(dev, IOMMU_NO_PASID);
>> mutex_lock(&group->mutex);
>> if (WARN_ON(domain != group->domain) ||
>> WARN_ON(list_count_nodes(&group->devices) != 1))
>> @@ -3269,6 +3291,7 @@ void iommu_detach_device_pasid(struct
>> iommu_domain *domain, struct device *dev,
>> {
>> struct iommu_group *group = iommu_group_get(dev);
>>
>> + assert_no_pending_iopf(dev, pasid);
> this doesn't look correct. A sane driver will stop triggering new
> page request before calling detach but there are still pending ones
> not drained until iopf_queue_flush_dev() called by
> ops->remove_dev_pasid().
>
> then this check will cause false warning.
>
You are right. It is not only incorrect but also pointless. The iommu
driver should flush the iopf queues in the path of detaching domains. I
will remove it if no objection.
Best regards,
baolu
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v4 10/10] iommu: Separate SVA and IOPF
2023-08-25 2:30 [PATCH v4 00/10] iommu: Prepare to deliver page faults to user space Lu Baolu
` (8 preceding siblings ...)
2023-08-25 2:30 ` [PATCH v4 09/10] iommu: Make iommu_queue_iopf() more generic Lu Baolu
@ 2023-08-25 2:30 ` Lu Baolu
9 siblings, 0 replies; 48+ messages in thread
From: Lu Baolu @ 2023-08-25 2:30 UTC (permalink / raw)
To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
Kevin Tian, Jean-Philippe Brucker, Nicolin Chen
Cc: Yi Liu, Jacob Pan, iommu, kvm, linux-kernel, Lu Baolu,
Jason Gunthorpe
Add CONFIG_IOMMU_IOPF for page fault handling framework and select it
from its real consumer. Move iopf function declaration from iommu-sva.h
to iommu.h and remove iommu-sva.h as it's empty now.
Consolidate all SVA related code into iommu-sva.c:
- Move iommu_sva_domain_alloc() from iommu.c to iommu-sva.c.
- Move sva iopf handling code from io-pgfault.c to iommu-sva.c.
Consolidate iommu_report_device_fault() and iommu_page_response() into
io-pgfault.c.
Export iopf_free_group() for iopf handlers implemented in modules. Some
functions are renamed with more meaningful names. No other intentional
functionality changes.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
include/linux/iommu.h | 90 +++++++---
drivers/iommu/iommu-sva.h | 69 --------
.../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 1 -
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 -
drivers/iommu/intel/iommu.c | 1 -
drivers/iommu/intel/svm.c | 1 -
drivers/iommu/io-pgfault.c | 163 ++++++++++++------
drivers/iommu/iommu-sva.c | 79 ++++++++-
drivers/iommu/iommu.c | 132 --------------
drivers/iommu/Kconfig | 4 +
drivers/iommu/Makefile | 3 +-
drivers/iommu/intel/Kconfig | 1 +
12 files changed, 255 insertions(+), 290 deletions(-)
delete mode 100644 drivers/iommu/iommu-sva.h
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 687dfde2c874..a1ca70d298ee 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -615,10 +615,6 @@ extern struct iommu_group *iommu_group_get(struct device *dev);
extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
-extern int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg);
-
extern int iommu_group_id(struct iommu_group *group);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
@@ -808,8 +804,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group);
int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
- struct mm_struct *mm);
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain,
@@ -996,18 +990,6 @@ static inline void iommu_group_put(struct iommu_group *group)
{
}
-static inline
-int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
-{
- return -ENODEV;
-}
-
-static inline int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg)
-{
- return -ENODEV;
-}
-
static inline int iommu_group_id(struct iommu_group *group)
{
return -ENODEV;
@@ -1144,12 +1126,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
return -ENODEV;
}
-static inline struct iommu_domain *
-iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
-{
- return NULL;
-}
-
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
@@ -1271,6 +1247,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm);
#else
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1289,6 +1267,70 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
static inline void mm_pasid_init(struct mm_struct *mm) {}
static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
static inline void mm_pasid_drop(struct mm_struct *mm) {}
+
+static inline struct iommu_domain *
+iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_SVA */
+#ifdef CONFIG_IOMMU_IOPF
+int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
+int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev);
+int iopf_queue_flush_dev(struct device *dev);
+struct iopf_queue *iopf_queue_alloc(const char *name);
+void iopf_queue_free(struct iopf_queue *queue);
+int iopf_queue_discard_partial(struct iopf_queue *queue);
+void iopf_free_group(struct iopf_group *group);
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
+int iommu_page_response(struct device *dev, struct iommu_page_response *msg);
+#else
+static inline int
+iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline int
+iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline int iopf_queue_flush_dev(struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline struct iopf_queue *iopf_queue_alloc(const char *name)
+{
+ return NULL;
+}
+
+static inline void iopf_queue_free(struct iopf_queue *queue)
+{
+}
+
+static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
+{
+ return -ENODEV;
+}
+
+static inline void iopf_free_group(struct iopf_group *group)
+{
+}
+
+static inline int
+iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
+{
+ return -ENODEV;
+}
+
+static inline int
+iommu_page_response(struct device *dev, struct iommu_page_response *msg)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_IOMMU_IOPF */
#endif /* __LINUX_IOMMU_H */
diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
deleted file mode 100644
index 27c8da115b41..000000000000
--- a/drivers/iommu/iommu-sva.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * SVA library for IOMMU drivers
- */
-#ifndef _IOMMU_SVA_H
-#define _IOMMU_SVA_H
-
-#include <linux/mm_types.h>
-
-/* I/O Page fault */
-struct device;
-struct iommu_fault;
-struct iopf_queue;
-
-#ifdef CONFIG_IOMMU_SVA
-int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev);
-
-int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
-int iopf_queue_remove_device(struct iopf_queue *queue,
- struct device *dev);
-int iopf_queue_flush_dev(struct device *dev);
-struct iopf_queue *iopf_queue_alloc(const char *name);
-void iopf_queue_free(struct iopf_queue *queue);
-int iopf_queue_discard_partial(struct iopf_queue *queue);
-int iommu_sva_handle_iopf(struct iopf_group *group);
-
-#else /* CONFIG_IOMMU_SVA */
-static inline int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_add_device(struct iopf_queue *queue,
- struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_remove_device(struct iopf_queue *queue,
- struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iopf_queue_flush_dev(struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline struct iopf_queue *iopf_queue_alloc(const char *name)
-{
- return NULL;
-}
-
-static inline void iopf_queue_free(struct iopf_queue *queue)
-{
-}
-
-static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
-{
- return -ENODEV;
-}
-
-static inline int iommu_sva_handle_iopf(struct iopf_group *group)
-{
- return IOMMU_PAGE_RESP_INVALID;
-}
-#endif /* CONFIG_IOMMU_SVA */
-#endif /* _IOMMU_SVA_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 7748ee30f8c0..fad7de25d28a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -10,7 +10,6 @@
#include <linux/slab.h>
#include "arm-smmu-v3.h"
-#include "../../iommu-sva.h"
#include "../../io-pgtable-arm.h"
struct arm_smmu_mmu_notifier {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e537b6c046c5..d0d349bfdba3 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -29,7 +29,6 @@
#include "arm-smmu-v3.h"
#include "../../dma-iommu.h"
-#include "../../iommu-sva.h"
static bool disable_bypass = true;
module_param(disable_bypass, bool, 0444);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 35839ef69089..bddc060aa615 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -26,7 +26,6 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
-#include "../iommu-sva.h"
#include "pasid.h"
#include "cap_audit.h"
#include "perfmon.h"
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 9e8d4d75d293..3bfdcba841e7 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -22,7 +22,6 @@
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
-#include "../iommu-sva.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 06330d922925..f307e592547e 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -11,12 +11,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include "iommu-sva.h"
-
-enum iommu_page_response_code
-iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm);
-
-static void iopf_free_group(struct iopf_group *group)
+void iopf_free_group(struct iopf_group *group)
{
struct iopf_fault *iopf, *next;
@@ -27,47 +22,10 @@ static void iopf_free_group(struct iopf_group *group)
kfree(group);
}
-
-static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
- enum iommu_page_response_code status)
-{
- struct iommu_page_response resp = {
- .pasid = iopf->fault.prm.pasid,
- .grpid = iopf->fault.prm.grpid,
- .code = status,
- };
-
- if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
- (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
- resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
-
- return iommu_page_response(dev, &resp);
-}
-
-static void iopf_handler(struct work_struct *work)
-{
- struct iopf_fault *iopf;
- struct iopf_group *group;
- enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
-
- group = container_of(work, struct iopf_group, work);
- list_for_each_entry(iopf, &group->faults, list) {
- /*
- * For the moment, errors are sticky: don't handle subsequent
- * faults in the group if there is an error.
- */
- if (status != IOMMU_PAGE_RESP_SUCCESS)
- break;
-
- status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
- }
-
- iopf_complete_group(group->dev, &group->last_fault, status);
- iopf_free_group(group);
-}
+EXPORT_SYMBOL_GPL(iopf_free_group);
/**
- * iommu_queue_iopf - IO Page Fault handler
+ * iommu_handle_iopf - IO Page Fault handler
* @fault: fault event
* @dev: struct device.
*
@@ -106,7 +64,7 @@ static void iopf_handler(struct work_struct *work)
*
* Return: 0 on success and <0 on error.
*/
-int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
+static int iommu_handle_iopf(struct iommu_fault *fault, struct device *dev)
{
int ret;
struct iopf_group *group;
@@ -193,18 +151,117 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
}
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_queue_iopf);
-int iommu_sva_handle_iopf(struct iopf_group *group)
+/**
+ * iommu_report_device_fault() - Report fault event to device driver
+ * @dev: the device
+ * @evt: fault event data
+ *
+ * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
+ * handler. When this function fails and the fault is recoverable, it is the
+ * caller's responsibility to complete the fault.
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
{
- struct iommu_fault_param *fault_param = group->dev->iommu->fault_param;
+ struct dev_iommu *param = dev->iommu;
+ struct iopf_fault *evt_pending = NULL;
+ struct iommu_fault_param *fparam;
+ int ret = 0;
- INIT_WORK(&group->work, iopf_handler);
- if (!queue_work(fault_param->queue->wq, &group->work))
- return -EBUSY;
+ if (!param || !evt)
+ return -EINVAL;
- return 0;
+ /* we only report device fault if there is a handler registered */
+ mutex_lock(¶m->lock);
+ fparam = param->fault_param;
+
+ if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
+ (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
+ evt_pending = kmemdup(evt, sizeof(struct iopf_fault),
+ GFP_KERNEL);
+ if (!evt_pending) {
+ ret = -ENOMEM;
+ goto done_unlock;
+ }
+ mutex_lock(&fparam->lock);
+ list_add_tail(&evt_pending->list, &fparam->faults);
+ mutex_unlock(&fparam->lock);
+ }
+
+ ret = iommu_handle_iopf(&evt->fault, dev);
+ if (ret && evt_pending) {
+ mutex_lock(&fparam->lock);
+ list_del(&evt_pending->list);
+ mutex_unlock(&fparam->lock);
+ kfree(evt_pending);
+ }
+done_unlock:
+ mutex_unlock(¶m->lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_report_device_fault);
+
+int iommu_page_response(struct device *dev,
+ struct iommu_page_response *msg)
+{
+ bool needs_pasid;
+ int ret = -EINVAL;
+ struct iopf_fault *evt;
+ struct iommu_fault_page_request *prm;
+ struct dev_iommu *param = dev->iommu;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
+
+ if (!ops->page_response)
+ return -ENODEV;
+
+ if (!param || !param->fault_param)
+ return -EINVAL;
+
+ /* Only send response if there is a fault report pending */
+ mutex_lock(¶m->fault_param->lock);
+ if (list_empty(¶m->fault_param->faults)) {
+ dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
+ goto done_unlock;
+ }
+ /*
+ * Check if we have a matching page request pending to respond,
+ * otherwise return -EINVAL
+ */
+ list_for_each_entry(evt, ¶m->fault_param->faults, list) {
+ prm = &evt->fault.prm;
+ if (prm->grpid != msg->grpid)
+ continue;
+
+ /*
+ * If the PASID is required, the corresponding request is
+ * matched using the group ID, the PASID valid bit and the PASID
+ * value. Otherwise only the group ID matches request and
+ * response.
+ */
+ needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
+ continue;
+
+ if (!needs_pasid && has_pasid) {
+ /* No big deal, just clear it. */
+ msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
+ msg->pasid = 0;
+ }
+
+ ret = ops->page_response(dev, evt, msg);
+ list_del(&evt->list);
+ kfree(evt);
+ break;
+ }
+
+done_unlock:
+ mutex_unlock(¶m->fault_param->lock);
+ return ret;
}
+EXPORT_SYMBOL_GPL(iommu_page_response);
/**
* iopf_queue_flush_dev - Ensure that all queued faults have been processed
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index ba0d5b7e106a..277a25472658 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -7,8 +7,6 @@
#include <linux/sched/mm.h>
#include <linux/iommu.h>
-#include "iommu-sva.h"
-
static DEFINE_MUTEX(iommu_sva_lock);
/* Allocate a PASID for the mm within range (inclusive) */
@@ -145,10 +143,18 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
}
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+void mm_pasid_drop(struct mm_struct *mm)
+{
+ if (likely(!mm_valid_pasid(mm)))
+ return;
+
+ iommu_free_global_pasid(mm->pasid);
+}
+
/*
* I/O page fault handler for SVA
*/
-enum iommu_page_response_code
+static enum iommu_page_response_code
iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm)
{
vm_fault_t ret;
@@ -202,10 +208,69 @@ iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm)
return status;
}
-void mm_pasid_drop(struct mm_struct *mm)
+static int iommu_sva_complete_group(struct device *dev, struct iopf_fault *iopf,
+ enum iommu_page_response_code status)
{
- if (likely(!mm_valid_pasid(mm)))
- return;
+ struct iommu_page_response resp = {
+ .pasid = iopf->fault.prm.pasid,
+ .grpid = iopf->fault.prm.grpid,
+ .code = status,
+ };
- iommu_free_global_pasid(mm->pasid);
+ if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
+ (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
+ resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
+
+ return iommu_page_response(dev, &resp);
+}
+
+static void iommu_sva_handle_iopf(struct work_struct *work)
+{
+ struct iopf_fault *iopf;
+ struct iopf_group *group;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
+
+ group = container_of(work, struct iopf_group, work);
+ list_for_each_entry(iopf, &group->faults, list) {
+ /*
+ * For the moment, errors are sticky: don't handle subsequent
+ * faults in the group if there is an error.
+ */
+ if (status != IOMMU_PAGE_RESP_SUCCESS)
+ break;
+
+ status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
+ }
+
+ iommu_sva_complete_group(group->dev, &group->last_fault, status);
+ iopf_free_group(group);
+}
+
+static int iommu_sva_iopf_handler(struct iopf_group *group)
+{
+ struct iommu_fault_param *fault_param = group->dev->iommu->fault_param;
+
+ INIT_WORK(&group->work, iommu_sva_handle_iopf);
+ if (!queue_work(fault_param->queue->wq, &group->work))
+ return -EBUSY;
+
+ return 0;
+}
+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *domain;
+
+ domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
+ if (!domain)
+ return NULL;
+
+ domain->type = IOMMU_DOMAIN_SVA;
+ mmgrab(mm);
+ domain->mm = mm;
+ domain->iopf_handler = iommu_sva_iopf_handler;
+
+ return domain;
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0704a0f36349..34ac7768d8e3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -35,8 +35,6 @@
#include "dma-iommu.h"
-#include "iommu-sva.h"
-
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static DEFINE_IDA(iommu_global_pasid_ida);
@@ -1250,117 +1248,6 @@ void iommu_group_put(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_put);
-/**
- * iommu_report_device_fault() - Report fault event to device driver
- * @dev: the device
- * @evt: fault event data
- *
- * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
- * handler. When this function fails and the fault is recoverable, it is the
- * caller's responsibility to complete the fault.
- *
- * Return 0 on success, or an error.
- */
-int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
-{
- struct dev_iommu *param = dev->iommu;
- struct iopf_fault *evt_pending = NULL;
- struct iommu_fault_param *fparam;
- int ret = 0;
-
- if (!param || !evt)
- return -EINVAL;
-
- /* we only report device fault if there is a handler registered */
- mutex_lock(¶m->lock);
- fparam = param->fault_param;
-
- if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
- (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
- evt_pending = kmemdup(evt, sizeof(struct iopf_fault),
- GFP_KERNEL);
- if (!evt_pending) {
- ret = -ENOMEM;
- goto done_unlock;
- }
- mutex_lock(&fparam->lock);
- list_add_tail(&evt_pending->list, &fparam->faults);
- mutex_unlock(&fparam->lock);
- }
-
- ret = iommu_queue_iopf(&evt->fault, dev);
- if (ret && evt_pending) {
- mutex_lock(&fparam->lock);
- list_del(&evt_pending->list);
- mutex_unlock(&fparam->lock);
- kfree(evt_pending);
- }
-done_unlock:
- mutex_unlock(¶m->lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_report_device_fault);
-
-int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg)
-{
- bool needs_pasid;
- int ret = -EINVAL;
- struct iopf_fault *evt;
- struct iommu_fault_page_request *prm;
- struct dev_iommu *param = dev->iommu;
- const struct iommu_ops *ops = dev_iommu_ops(dev);
- bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
-
- if (!ops->page_response)
- return -ENODEV;
-
- if (!param || !param->fault_param)
- return -EINVAL;
-
- /* Only send response if there is a fault report pending */
- mutex_lock(¶m->fault_param->lock);
- if (list_empty(¶m->fault_param->faults)) {
- dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
- goto done_unlock;
- }
- /*
- * Check if we have a matching page request pending to respond,
- * otherwise return -EINVAL
- */
- list_for_each_entry(evt, ¶m->fault_param->faults, list) {
- prm = &evt->fault.prm;
- if (prm->grpid != msg->grpid)
- continue;
-
- /*
- * If the PASID is required, the corresponding request is
- * matched using the group ID, the PASID valid bit and the PASID
- * value. Otherwise only the group ID matches request and
- * response.
- */
- needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
- if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
- continue;
-
- if (!needs_pasid && has_pasid) {
- /* No big deal, just clear it. */
- msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
- msg->pasid = 0;
- }
-
- ret = ops->page_response(dev, evt, msg);
- list_del(&evt->list);
- kfree(evt);
- break;
- }
-
-done_unlock:
- mutex_unlock(¶m->fault_param->lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_page_response);
-
/**
* iommu_group_id - Return ID for a group
* @group: the group to ID
@@ -3337,25 +3224,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
- struct mm_struct *mm)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
- struct iommu_domain *domain;
-
- domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
- if (!domain)
- return NULL;
-
- domain->type = IOMMU_DOMAIN_SVA;
- mmgrab(mm);
- domain->mm = mm;
- domain->iopf_handler = iommu_sva_handle_iopf;
- domain->fault_data = mm;
-
- return domain;
-}
-
ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
int ret;
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 2b12b583ef4b..e7b87ec525a7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -158,6 +158,9 @@ config IOMMU_DMA
config IOMMU_SVA
bool
+config IOMMU_IOPF
+ bool
+
config FSL_PAMU
bool "Freescale IOMMU support"
depends on PCI
@@ -404,6 +407,7 @@ config ARM_SMMU_V3_SVA
bool "Shared Virtual Addressing support for the ARM SMMUv3"
depends on ARM_SMMU_V3
select IOMMU_SVA
+ select IOMMU_IOPF
select MMU_NOTIFIER
help
Support for sharing process address spaces with devices using the
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 769e43d780ce..6d114fe7ae89 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
+obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..613f149510a7 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -50,6 +50,7 @@ config INTEL_IOMMU_SVM
depends on X86_64
select MMU_NOTIFIER
select IOMMU_SVA
+ select IOMMU_IOPF
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
--
2.34.1
^ permalink raw reply related [flat|nested] 48+ messages in thread