public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] iommu/vt-d: Refactor PRI enable/disable steps
@ 2024-06-27  2:31 Lu Baolu
  2024-06-27  2:31 ` [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change Lu Baolu
  2024-06-27  2:31 ` [PATCH v2 2/2] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks Lu Baolu
  0 siblings, 2 replies; 10+ messages in thread
From: Lu Baolu @ 2024-06-27  2:31 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
	Kevin Tian
  Cc: iommu, linux-kernel, Lu Baolu

The page fault handling framework within the iommu core has defined the
PRI enable and disable flows in the comments for the
iopf_queue_remove_device() interface. This series aims to refactor the
PRI enable/disable steps in the Intel iommu driver to align with these
definitions.

Change log:

v2:
 - The cache invalidation for a context entry change should not affect
   the devices not related to the entry. Fix this by always using
   device-selective cache invalidation.
v1:
 - https://lore.kernel.org/linux-iommu/20240606034019.42795-1-baolu.lu@linux.intel.com/

Lu Baolu (2):
  iommu/vt-d: Add helper to flush caches for context change
  iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks

 drivers/iommu/intel/iommu.h |  13 +++++
 drivers/iommu/intel/iommu.c |  89 ++++++++++++++++++-------------
 drivers/iommu/intel/pasid.c | 103 ++++++++++++++++++++++++++++--------
 3 files changed, 148 insertions(+), 57 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-27  2:31 [PATCH v2 0/2] iommu/vt-d: Refactor PRI enable/disable steps Lu Baolu
@ 2024-06-27  2:31 ` Lu Baolu
  2024-06-27  6:08   ` Tian, Kevin
  2024-07-02  0:23   ` Jacob Pan
  2024-06-27  2:31 ` [PATCH v2 2/2] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks Lu Baolu
  1 sibling, 2 replies; 10+ messages in thread
From: Lu Baolu @ 2024-06-27  2:31 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
	Kevin Tian
  Cc: iommu, linux-kernel, Lu Baolu

This helper is used to flush the related caches following a change in a
context table entry that was previously present. The VT-d specification
provides guidance for such invalidations in section 6.5.3.3.

This helper replaces the existing open code in the code paths where a
present context entry is being torn down.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.h |   4 ++
 drivers/iommu/intel/iommu.c |  32 +-----------
 drivers/iommu/intel/pasid.c | 101 +++++++++++++++++++++++++++++-------
 3 files changed, 87 insertions(+), 50 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 9a3b064126de..63eb3306c025 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1143,6 +1143,10 @@ void cache_tag_flush_all(struct dmar_domain *domain);
 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
 			      unsigned long end);
 
+void intel_context_flush_present(struct device_domain_info *info,
+				 struct context_entry *context,
+				 bool affect_domains);
+
 #ifdef CONFIG_INTEL_IOMMU_SVM
 void intel_svm_check(struct intel_iommu *iommu);
 int intel_svm_enable_prq(struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6c682c9defee..89f489372138 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1359,21 +1359,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info)
 	}
 }
 
-static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
-				    u64 addr, unsigned int mask)
-{
-	u16 sid, qdep;
-
-	if (!info || !info->ats_enabled)
-		return;
-
-	sid = info->bus << 8 | info->devfn;
-	qdep = info->ats_qdep;
-	qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
-			   qdep, addr, mask);
-	quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
-}
-
 static void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
 	cache_tag_flush_all(to_dmar_domain(domain));
@@ -1959,7 +1944,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
 {
 	struct intel_iommu *iommu = info->iommu;
 	struct context_entry *context;
-	u16 did_old;
 
 	spin_lock(&iommu->lock);
 	context = iommu_context_addr(iommu, bus, devfn, 0);
@@ -1968,24 +1952,10 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
 		return;
 	}
 
-	did_old = context_domain_id(context);
-
 	context_clear_entry(context);
 	__iommu_flush_cache(iommu, context, sizeof(*context));
+	intel_context_flush_present(info, context, true);
 	spin_unlock(&iommu->lock);
-	iommu->flush.flush_context(iommu,
-				   did_old,
-				   (((u16)bus) << 8) | devfn,
-				   DMA_CCMD_MASK_NOBIT,
-				   DMA_CCMD_DEVICE_INVL);
-
-	iommu->flush.flush_iotlb(iommu,
-				 did_old,
-				 0,
-				 0,
-				 DMA_TLB_DSI_FLUSH);
-
-	__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
 }
 
 static int domain_setup_first_level(struct intel_iommu *iommu,
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index aabcdf756581..aef4b4afb873 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -690,26 +690,8 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
 
 	context_clear_entry(context);
 	__iommu_flush_cache(iommu, context, sizeof(*context));
+	intel_context_flush_present(info, context, false);
 	spin_unlock(&iommu->lock);
-
-	/*
-	 * Cache invalidation for changes to a scalable-mode context table
-	 * entry.
-	 *
-	 * Section 6.5.3.3 of the VT-d spec:
-	 * - Device-selective context-cache invalidation;
-	 * - Domain-selective PASID-cache invalidation to affected domains
-	 *   (can be skipped if all PASID entries were not-present);
-	 * - Domain-selective IOTLB invalidation to affected domains;
-	 * - Global Device-TLB invalidation to affected functions.
-	 *
-	 * The iommu has been parked in the blocking state. All domains have
-	 * been detached from the device or PASID. The PASID and IOTLB caches
-	 * have been invalidated during the domain detach path.
-	 */
-	iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn),
-				   DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
-	devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
 }
 
 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
@@ -871,3 +853,84 @@ int intel_pasid_setup_sm_context(struct device *dev)
 
 	return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
 }
+
+/*
+ * Global Device-TLB invalidation following changes in a context entry which
+ * was present.
+ */
+static void __context_flush_dev_iotlb(struct device_domain_info *info)
+{
+	if (!info->ats_enabled)
+		return;
+
+	qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
+			   info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
+
+	/*
+	 * There is no guarantee that the device DMA is stopped when it reaches
+	 * here. Therefore, always attempt the extra device TLB invalidation
+	 * quirk. The impact on performance is acceptable since this is not a
+	 * performance-critical path.
+	 */
+	quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
+				  info->ats_qdep);
+}
+
+/*
+ * Cache invalidations after change in a context table entry that was present
+ * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
+ * IOMMU is in scalable mode and all PASID table entries of the device were
+ * non-present, set affect_domains to true. Otherwise, false.
+ */
+void intel_context_flush_present(struct device_domain_info *info,
+				 struct context_entry *context,
+				 bool affect_domains)
+{
+	struct intel_iommu *iommu = info->iommu;
+	u16 did = context_domain_id(context);
+	struct pasid_entry *pte;
+	int i;
+
+	assert_spin_locked(&iommu->lock);
+
+	/*
+	 * Device-selective context-cache invalidation. The Domain-ID field
+	 * of the Context-cache Invalidate Descriptor is ignored by hardware
+	 * when operating in scalable mode. Therefore the @did value doesn't
+	 * matter in scalable mode.
+	 */
+	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
+				   DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
+
+	/*
+	 * For legacy mode:
+	 * - Domain-selective IOTLB invalidation
+	 * - Global Device-TLB invalidation to all affected functions
+	 */
+	if (!sm_supported(iommu)) {
+		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+		__context_flush_dev_iotlb(info);
+
+		return;
+	}
+
+	/*
+	 * For scalable mode:
+	 * - Domain-selective PASID-cache invalidation to affected domains
+	 * - Domain-selective IOTLB invalidation to affected domains
+	 * - Global Device-TLB invalidation to affected functions
+	 */
+	if (affect_domains) {
+		for (i = 0; i < info->pasid_table->max_pasid; i++) {
+			pte = intel_pasid_get_entry(info->dev, i);
+			if (!pte || !pasid_pte_is_present(pte))
+				continue;
+
+			did = pasid_get_domain_id(pte);
+			qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0);
+			iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+		}
+	}
+
+	__context_flush_dev_iotlb(info);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks
  2024-06-27  2:31 [PATCH v2 0/2] iommu/vt-d: Refactor PRI enable/disable steps Lu Baolu
  2024-06-27  2:31 ` [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change Lu Baolu
@ 2024-06-27  2:31 ` Lu Baolu
  1 sibling, 0 replies; 10+ messages in thread
From: Lu Baolu @ 2024-06-27  2:31 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
	Kevin Tian
  Cc: iommu, linux-kernel, Lu Baolu

Commit 0095bf83554f8 ("iommu: Improve iopf_queue_remove_device()")
specified the flow for disabling the PRI on a device. Refactor the
PRI callbacks in the intel iommu driver to better manage PRI
enabling and disabling and align it with the device queue interfaces
in the iommu core.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.h |  9 ++++++
 drivers/iommu/intel/iommu.c | 57 +++++++++++++++++++++++++++++++++----
 drivers/iommu/intel/pasid.c |  2 --
 3 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 63eb3306c025..b67c14da1240 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1045,6 +1045,15 @@ static inline void context_set_sm_pre(struct context_entry *context)
 	context->lo |= BIT_ULL(4);
 }
 
+/*
+ * Clear the PRE(Page Request Enable) field of a scalable mode context
+ * entry.
+ */
+static inline void context_clear_sm_pre(struct context_entry *context)
+{
+	context->lo &= ~BIT_ULL(4);
+}
+
 /* Returns a number of VTD pages, but aligned to MM page size */
 static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
 {
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 89f489372138..d51f840715e5 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4241,6 +4241,37 @@ static int intel_iommu_enable_sva(struct device *dev)
 	return 0;
 }
 
+static int context_flip_pri(struct device_domain_info *info, bool enable)
+{
+	struct intel_iommu *iommu = info->iommu;
+	u8 bus = info->bus, devfn = info->devfn;
+	struct context_entry *context;
+
+	spin_lock(&iommu->lock);
+	if (context_copied(iommu, bus, devfn)) {
+		spin_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
+	context = iommu_context_addr(iommu, bus, devfn, false);
+	if (!context || !context_present(context)) {
+		spin_unlock(&iommu->lock);
+		return -ENODEV;
+	}
+
+	if (enable)
+		context_set_sm_pre(context);
+	else
+		context_clear_sm_pre(context);
+
+	if (!ecap_coherent(iommu->ecap))
+		clflush_cache_range(context, sizeof(*context));
+	intel_context_flush_present(info, context, true);
+	spin_unlock(&iommu->lock);
+
+	return 0;
+}
+
 static int intel_iommu_enable_iopf(struct device *dev)
 {
 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
@@ -4270,15 +4301,23 @@ static int intel_iommu_enable_iopf(struct device *dev)
 	if (ret)
 		return ret;
 
+	ret = context_flip_pri(info, true);
+	if (ret)
+		goto err_remove_device;
+
 	ret = pci_enable_pri(pdev, PRQ_DEPTH);
-	if (ret) {
-		iopf_queue_remove_device(iommu->iopf_queue, dev);
-		return ret;
-	}
+	if (ret)
+		goto err_clear_pri;
 
 	info->pri_enabled = 1;
 
 	return 0;
+err_clear_pri:
+	context_flip_pri(info, false);
+err_remove_device:
+	iopf_queue_remove_device(iommu->iopf_queue, dev);
+
+	return ret;
 }
 
 static int intel_iommu_disable_iopf(struct device *dev)
@@ -4289,6 +4328,15 @@ static int intel_iommu_disable_iopf(struct device *dev)
 	if (!info->pri_enabled)
 		return -EINVAL;
 
+	/* Disable new PRI reception: */
+	context_flip_pri(info, false);
+
+	/*
+	 * Remove device from fault queue and acknowledge all outstanding
+	 * PRQs to the device:
+	 */
+	iopf_queue_remove_device(iommu->iopf_queue, dev);
+
 	/*
 	 * PCIe spec states that by clearing PRI enable bit, the Page
 	 * Request Interface will not issue new page requests, but has
@@ -4299,7 +4347,6 @@ static int intel_iommu_disable_iopf(struct device *dev)
 	 */
 	pci_disable_pri(to_pci_dev(dev));
 	info->pri_enabled = 0;
-	iopf_queue_remove_device(iommu->iopf_queue, dev);
 
 	return 0;
 }
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index aef4b4afb873..b1ec9660bc11 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -749,8 +749,6 @@ static int context_entry_set_pasid_table(struct context_entry *context,
 
 	if (info->ats_supported)
 		context_set_sm_dte(context);
-	if (info->pri_supported)
-		context_set_sm_pre(context);
 	if (info->pasid_supported)
 		context_set_pasid(context);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* RE: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-27  2:31 ` [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change Lu Baolu
@ 2024-06-27  6:08   ` Tian, Kevin
  2024-06-27  8:21     ` Baolu Lu
  2024-07-02  0:23   ` Jacob Pan
  1 sibling, 1 reply; 10+ messages in thread
From: Tian, Kevin @ 2024-06-27  6:08 UTC (permalink / raw)
  To: Lu Baolu, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org

> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Thursday, June 27, 2024 10:31 AM
> 
> +/*
> + * Cache invalidations after change in a context table entry that was present
> + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
> + * IOMMU is in scalable mode and all PASID table entries of the device were
> + * non-present, set affect_domains to true. Otherwise, false.

if no PASID is present then the flag should be false.

s/affect_domains/flush_domains/

> + */
> +void intel_context_flush_present(struct device_domain_info *info,
> +				 struct context_entry *context,
> +				 bool affect_domains)
> +{
> +	struct intel_iommu *iommu = info->iommu;
> +	u16 did = context_domain_id(context);
> +	struct pasid_entry *pte;
> +	int i;
> +
> +	assert_spin_locked(&iommu->lock);
> +
> +	/*
> +	 * Device-selective context-cache invalidation. The Domain-ID field
> +	 * of the Context-cache Invalidate Descriptor is ignored by hardware
> +	 * when operating in scalable mode. Therefore the @did value
> doesn't
> +	 * matter in scalable mode.
> +	 */
> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info-
> >devfn),
> +				   DMA_CCMD_MASK_NOBIT,
> DMA_CCMD_DEVICE_INVL);
> +
> +	/*
> +	 * For legacy mode:
> +	 * - Domain-selective IOTLB invalidation
> +	 * - Global Device-TLB invalidation to all affected functions
> +	 */
> +	if (!sm_supported(iommu)) {
> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> DMA_TLB_DSI_FLUSH);
> +		__context_flush_dev_iotlb(info);
> +
> +		return;
> +	}
> +
> +	/*
> +	 * For scalable mode:
> +	 * - Domain-selective PASID-cache invalidation to affected domains
> +	 * - Domain-selective IOTLB invalidation to affected domains
> +	 * - Global Device-TLB invalidation to affected functions
> +	 */
> +	if (affect_domains) {
> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
> +			pte = intel_pasid_get_entry(info->dev, i);
> +			if (!pte || !pasid_pte_is_present(pte))
> +				continue;
> +
> +			did = pasid_get_domain_id(pte);
> +			qi_flush_pasid_cache(iommu, did,
> QI_PC_ALL_PASIDS, 0);
> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
> DMA_TLB_DSI_FLUSH);
> +		}
> +	}
> +
> +	__context_flush_dev_iotlb(info);
> +}
> --
> 2.34.1
> 

this change moves the entire cache invalidation flow inside
iommu->lock. Though the directly-affected operations are not in
critical path the indirect impact applies to all other paths acquiring
iommu->lock given it'll be held unnecessarily longer after this
change.

If the only requirement of holding iommu->lock is to walk the pasid
table, probably we can collect a bitmap of DID's in the locked walk
and then invalidate each in a loop outside of iommu->lock. Here
we only care about DIDs associated with the old context entry at
this point. New pasid attach will hit new context entry. Concurrent
pasid detach then may just come with duplicated invalidations.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-27  6:08   ` Tian, Kevin
@ 2024-06-27  8:21     ` Baolu Lu
  2024-06-28  7:43       ` Tian, Kevin
  0 siblings, 1 reply; 10+ messages in thread
From: Baolu Lu @ 2024-06-27  8:21 UTC (permalink / raw)
  To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe
  Cc: baolu.lu, iommu@lists.linux.dev, linux-kernel@vger.kernel.org

On 2024/6/27 14:08, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Thursday, June 27, 2024 10:31 AM
>>
>> +/*
>> + * Cache invalidations after change in a context table entry that was present
>> + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
>> + * IOMMU is in scalable mode and all PASID table entries of the device were
>> + * non-present, set affect_domains to true. Otherwise, false.
> 
> if no PASID is present then the flag should be false.
> 
> s/affect_domains/flush_domains/

Yes.

> 
>> + */
>> +void intel_context_flush_present(struct device_domain_info *info,
>> +				 struct context_entry *context,
>> +				 bool affect_domains)
>> +{
>> +	struct intel_iommu *iommu = info->iommu;
>> +	u16 did = context_domain_id(context);
>> +	struct pasid_entry *pte;
>> +	int i;
>> +
>> +	assert_spin_locked(&iommu->lock);
>> +
>> +	/*
>> +	 * Device-selective context-cache invalidation. The Domain-ID field
>> +	 * of the Context-cache Invalidate Descriptor is ignored by hardware
>> +	 * when operating in scalable mode. Therefore the @did value
>> doesn't
>> +	 * matter in scalable mode.
>> +	 */
>> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info-
>>> devfn),
>> +				   DMA_CCMD_MASK_NOBIT,
>> DMA_CCMD_DEVICE_INVL);
>> +
>> +	/*
>> +	 * For legacy mode:
>> +	 * - Domain-selective IOTLB invalidation
>> +	 * - Global Device-TLB invalidation to all affected functions
>> +	 */
>> +	if (!sm_supported(iommu)) {
>> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
>> DMA_TLB_DSI_FLUSH);
>> +		__context_flush_dev_iotlb(info);
>> +
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * For scalable mode:
>> +	 * - Domain-selective PASID-cache invalidation to affected domains
>> +	 * - Domain-selective IOTLB invalidation to affected domains
>> +	 * - Global Device-TLB invalidation to affected functions
>> +	 */
>> +	if (affect_domains) {
>> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
>> +			pte = intel_pasid_get_entry(info->dev, i);
>> +			if (!pte || !pasid_pte_is_present(pte))
>> +				continue;
>> +
>> +			did = pasid_get_domain_id(pte);
>> +			qi_flush_pasid_cache(iommu, did,
>> QI_PC_ALL_PASIDS, 0);
>> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
>> DMA_TLB_DSI_FLUSH);
>> +		}
>> +	}
>> +
>> +	__context_flush_dev_iotlb(info);
>> +}
>> --
>> 2.34.1
>>
> 
> this change moves the entire cache invalidation flow inside
> iommu->lock. Though the directly-affected operations are not in
> critical path the indirect impact applies to all other paths acquiring
> iommu->lock given it'll be held unnecessarily longer after this
> change.
> 
> If the only requirement of holding iommu->lock is to walk the pasid
> table, probably we can collect a bitmap of DID's in the locked walk
> and then invalidate each in a loop outside of iommu->lock. Here
> we only care about DIDs associated with the old context entry at
> this point. New pasid attach will hit new context entry. Concurrent
> pasid detach then may just come with duplicated invalidations.

The iommu->lock is not only for the PASID table walk. The basic
schematic here is that once a present context table entry is being
changed, all PASID entries should not be altered until all the related
caches have been flushed. This is because the configuration of the
context entry might also impact PASID translation.

Previously, we did not apply this lock because all those cases involved
changing the context entry from present to non-present, and we were
certain that all PASID entries were empty. Now, as we are making it a
generic helper that also serves scenarios where the entry remains
present after the change, we need this lock to ensure that no PASID
entry changes occur at the same time.

Best regards,
baolu

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-27  8:21     ` Baolu Lu
@ 2024-06-28  7:43       ` Tian, Kevin
  2024-06-28 11:24         ` Baolu Lu
  0 siblings, 1 reply; 10+ messages in thread
From: Tian, Kevin @ 2024-06-28  7:43 UTC (permalink / raw)
  To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org

> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Thursday, June 27, 2024 4:22 PM
> 
> On 2024/6/27 14:08, Tian, Kevin wrote:
> >> From: Lu Baolu <baolu.lu@linux.intel.com>
> >> Sent: Thursday, June 27, 2024 10:31 AM
> >>
> >> + */
> >> +void intel_context_flush_present(struct device_domain_info *info,
> >> +				 struct context_entry *context,
> >> +				 bool affect_domains)
> >> +{
> >> +	struct intel_iommu *iommu = info->iommu;
> >> +	u16 did = context_domain_id(context);
> >> +	struct pasid_entry *pte;
> >> +	int i;
> >> +
> >> +	assert_spin_locked(&iommu->lock);
> >> +
> >> +	/*
> >> +	 * Device-selective context-cache invalidation. The Domain-ID field
> >> +	 * of the Context-cache Invalidate Descriptor is ignored by hardware
> >> +	 * when operating in scalable mode. Therefore the @did value
> >> doesn't
> >> +	 * matter in scalable mode.
> >> +	 */
> >> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info-
> >>> devfn),
> >> +				   DMA_CCMD_MASK_NOBIT,
> >> DMA_CCMD_DEVICE_INVL);
> >> +
> >> +	/*
> >> +	 * For legacy mode:
> >> +	 * - Domain-selective IOTLB invalidation
> >> +	 * - Global Device-TLB invalidation to all affected functions
> >> +	 */
> >> +	if (!sm_supported(iommu)) {
> >> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> >> DMA_TLB_DSI_FLUSH);
> >> +		__context_flush_dev_iotlb(info);
> >> +
> >> +		return;
> >> +	}
> >> +
> >> +	/*
> >> +	 * For scalable mode:
> >> +	 * - Domain-selective PASID-cache invalidation to affected domains
> >> +	 * - Domain-selective IOTLB invalidation to affected domains
> >> +	 * - Global Device-TLB invalidation to affected functions
> >> +	 */
> >> +	if (affect_domains) {
> >> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
> >> +			pte = intel_pasid_get_entry(info->dev, i);
> >> +			if (!pte || !pasid_pte_is_present(pte))
> >> +				continue;
> >> +
> >> +			did = pasid_get_domain_id(pte);
> >> +			qi_flush_pasid_cache(iommu, did,
> >> QI_PC_ALL_PASIDS, 0);
> >> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
> >> DMA_TLB_DSI_FLUSH);
> >> +		}
> >> +	}
> >> +
> >> +	__context_flush_dev_iotlb(info);
> >> +}
> >> --
> >> 2.34.1
> >>
> >
> > this change moves the entire cache invalidation flow inside
> > iommu->lock. Though the directly-affected operations are not in
> > critical path the indirect impact applies to all other paths acquiring
> > iommu->lock given it'll be held unnecessarily longer after this
> > change.
> >
> > If the only requirement of holding iommu->lock is to walk the pasid
> > table, probably we can collect a bitmap of DID's in the locked walk
> > and then invalidate each in a loop outside of iommu->lock. Here
> > we only care about DIDs associated with the old context entry at
> > this point. New pasid attach will hit new context entry. Concurrent
> > pasid detach then may just come with duplicated invalidations.
> 
> The iommu->lock is not only for the PASID table walk. The basic
> schematic here is that once a present context table entry is being
> changed, all PASID entries should not be altered until all the related
> caches have been flushed. This is because the configuration of the
> context entry might also impact PASID translation.

Is it what the spec explicitly requires? My impression was that we
need to invalidate any cache which may be associated with the old
context entry, which is not equal to prohibiting PASID entries from
being changed at the same time (as long as those changes won't 
cause a stale cache entry being active).

e.g. let's say this helper collects valid pasid entries (2, 3, 4) and
associated DIDs (x, y, z) in a locked walk of the pasid table and
then follows the spec sequence to invalidate the pasid cache
for (2, 3, 4) and the iotlb for (x, y, z) outside of the lock.

there could be several concurrent scenarios if iommu->lock is
only guarded on the pasid table walking:

1) pasid entry #1 is torn down before the locked walk. The
teardown path will invalidate its pasid cache and iotlb properly.
2) pasid entry #4 is torn down after the locked walk. Then we may
see duplicated invalidations both in this helper and in the
teardown path.
3) new pasid attach before locked walk may be associated with
either old or new context entry, depending on whether it's passed
the context cache invalidation. In any case it will be captured by
locked walk and then have related cache invalidated in the helper.
4) new pasid attach after locked walk is always safe as related
cache will only be associated with the new context entry.

> 
> Previously, we did not apply this lock because all those cases involved
> changing the context entry from present to non-present, and we were
> certain that all PASID entries were empty. Now, as we are making it a
> generic helper that also serves scenarios where the entry remains
> present after the change, we need this lock to ensure that no PASID
> entry changes occur at the same time.
> 

Even if we want to do a conservative locking I prefer to not applying
it to existing paths which clearly have no need for extended lock.

Then probably making a specific helper for pri flip makes more sense
than generalizing code to incur unnecessary lock overhead on existing
paths...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-28  7:43       ` Tian, Kevin
@ 2024-06-28 11:24         ` Baolu Lu
  2024-07-01  6:39           ` Tian, Kevin
  0 siblings, 1 reply; 10+ messages in thread
From: Baolu Lu @ 2024-06-28 11:24 UTC (permalink / raw)
  To: Tian, Kevin, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe
  Cc: baolu.lu, iommu@lists.linux.dev, linux-kernel@vger.kernel.org

On 2024/6/28 15:43, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Thursday, June 27, 2024 4:22 PM
>>
>> On 2024/6/27 14:08, Tian, Kevin wrote:
>>>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>>> Sent: Thursday, June 27, 2024 10:31 AM
>>>>
>>>> + */
>>>> +void intel_context_flush_present(struct device_domain_info *info,
>>>> +				 struct context_entry *context,
>>>> +				 bool affect_domains)
>>>> +{
>>>> +	struct intel_iommu *iommu = info->iommu;
>>>> +	u16 did = context_domain_id(context);
>>>> +	struct pasid_entry *pte;
>>>> +	int i;
>>>> +
>>>> +	assert_spin_locked(&iommu->lock);
>>>> +
>>>> +	/*
>>>> +	 * Device-selective context-cache invalidation. The Domain-ID field
>>>> +	 * of the Context-cache Invalidate Descriptor is ignored by hardware
>>>> +	 * when operating in scalable mode. Therefore the @did value
>>>> doesn't
>>>> +	 * matter in scalable mode.
>>>> +	 */
>>>> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info-
>>>>> devfn),
>>>> +				   DMA_CCMD_MASK_NOBIT,
>>>> DMA_CCMD_DEVICE_INVL);
>>>> +
>>>> +	/*
>>>> +	 * For legacy mode:
>>>> +	 * - Domain-selective IOTLB invalidation
>>>> +	 * - Global Device-TLB invalidation to all affected functions
>>>> +	 */
>>>> +	if (!sm_supported(iommu)) {
>>>> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
>>>> DMA_TLB_DSI_FLUSH);
>>>> +		__context_flush_dev_iotlb(info);
>>>> +
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * For scalable mode:
>>>> +	 * - Domain-selective PASID-cache invalidation to affected domains
>>>> +	 * - Domain-selective IOTLB invalidation to affected domains
>>>> +	 * - Global Device-TLB invalidation to affected functions
>>>> +	 */
>>>> +	if (affect_domains) {
>>>> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
>>>> +			pte = intel_pasid_get_entry(info->dev, i);
>>>> +			if (!pte || !pasid_pte_is_present(pte))
>>>> +				continue;
>>>> +
>>>> +			did = pasid_get_domain_id(pte);
>>>> +			qi_flush_pasid_cache(iommu, did,
>>>> QI_PC_ALL_PASIDS, 0);
>>>> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
>>>> DMA_TLB_DSI_FLUSH);
>>>> +		}
>>>> +	}
>>>> +
>>>> +	__context_flush_dev_iotlb(info);
>>>> +}
>>>> --
>>>> 2.34.1
>>>>
>>>
>>> this change moves the entire cache invalidation flow inside
>>> iommu->lock. Though the directly-affected operations are not in
>>> critical path the indirect impact applies to all other paths acquiring
>>> iommu->lock given it'll be held unnecessarily longer after this
>>> change.
>>>
>>> If the only requirement of holding iommu->lock is to walk the pasid
>>> table, probably we can collect a bitmap of DID's in the locked walk
>>> and then invalidate each in a loop outside of iommu->lock. Here
>>> we only care about DIDs associated with the old context entry at
>>> this point. New pasid attach will hit new context entry. Concurrent
>>> pasid detach then may just come with duplicated invalidations.
>>
>> The iommu->lock is not only for the PASID table walk. The basic
>> schematic here is that once a present context table entry is being
>> changed, all PASID entries should not be altered until all the related
>> caches have been flushed. This is because the configuration of the
>> context entry might also impact PASID translation.
> 
> Is it what the spec explicitly requires? My impression was that we
> need to invalidate any cache which may be associated with the old
> context entry, which is not equal to prohibiting PASID entries from
> being changed at the same time (as long as those changes won't
> cause a stale cache entry being active).

No, I didn't find that the VT-d spec explicitly requires this. But
conceptually, the context entry controls features of the whole device,
while pasid entries control the translation on a pasid. The change in
context entry potentially impacts the pasid translation ...

> 
> e.g. let's say this helper collects valid pasid entries (2, 3, 4) and
> associated DIDs (x, y, z) in a locked walk of the pasid table and
> then follows the spec sequence to invalidate the pasid cache
> for (2, 3, 4) and the iotlb for (x, y, z) outside of the lock.
> 
> there could be several concurrent scenarios if iommu->lock is
> only guarded on the pasid table walking:
> 
> 1) pasid entry #1 is torn down before the locked walk. The
> teardown path will invalidate its pasid cache and iotlb properly.
> 2) pasid entry #4 is torn down after the locked walk. Then we may
> see duplicated invalidations both in this helper and in the
> teardown path.
> 3) new pasid attach before locked walk may be associated with
> either old or new context entry, depending on whether it's passed
> the context cache invalidation. In any case it will be captured by
> locked walk and then have related cache invalidated in the helper.
> 4) new pasid attach after locked walk is always safe as related
> cache will only be associated with the new context entry.

... hence, from the driver's point of view, let's start from simplicity
unless there's any real use case.

>>
>> Previously, we did not apply this lock because all those cases involved
>> changing the context entry from present to non-present, and we were
>> certain that all PASID entries were empty. Now, as we are making it a
>> generic helper that also serves scenarios where the entry remains
>> present after the change, we need this lock to ensure that no PASID
>> entry changes occur at the same time.
>>
> 
> Even if we want to do a conservative locking I prefer to not applying
> it to existing paths which clearly have no need for extended lock.
> 
> Then probably making a specific helper for pri flip makes more sense
> than generalizing code to incur unnecessary lock overhead on existing
> paths...

Yes, agreed with you. We don't need to extend this lock mechanism change
to the existing cases where it's unnecessary.

How about below additional changes?

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index c5d9c2283977..523407f6f6b2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1954,8 +1954,8 @@ static void domain_context_clear_one(struct 
device_domain_info *info, u8 bus, u8

  	context_clear_entry(context);
  	__iommu_flush_cache(iommu, context, sizeof(*context));
-	intel_context_flush_present(info, context, true);
  	spin_unlock(&iommu->lock);
+	intel_context_flush_present(info, context, true);
  }

  static int domain_setup_first_level(struct intel_iommu *iommu,
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 01156135e60f..9a7b5668c723 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -690,8 +690,8 @@ static void device_pasid_table_teardown(struct 
device *dev, u8 bus, u8 devfn)

  	context_clear_entry(context);
  	__iommu_flush_cache(iommu, context, sizeof(*context));
+	spin_unlock(&iommu->lock);
  	intel_context_flush_present(info, context, false);
-	spin_unlock(&iommu->lock);
  }

  static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, 
void *data)
@@ -889,8 +889,6 @@ void intel_context_flush_present(struct 
device_domain_info *info,
  	struct pasid_entry *pte;
  	int i;

-	assert_spin_locked(&iommu->lock);
-
  	/*
  	 * Device-selective context-cache invalidation. The Domain-ID field
  	 * of the Context-cache Invalidate Descriptor is ignored by hardware
@@ -919,6 +917,13 @@ void intel_context_flush_present(struct 
device_domain_info *info,
  	 * - Global Device-TLB invalidation to affected functions
  	 */
  	if (flush_domains) {
+		/*
+		 * If the IOMMU is running in scalable mode and there might
+		 * be potential PASID translations, the caller should hold
+		 * the lock to ensure that context changes and cache flushes
+		 * are atomic.
+		 */
+		assert_spin_locked(&iommu->lock);
  		for (i = 0; i < info->pasid_table->max_pasid; i++) {
  			pte = intel_pasid_get_entry(info->dev, i);
  			if (!pte || !pasid_pte_is_present(pte))

Best regards,
baolu

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* RE: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-28 11:24         ` Baolu Lu
@ 2024-07-01  6:39           ` Tian, Kevin
  0 siblings, 0 replies; 10+ messages in thread
From: Tian, Kevin @ 2024-07-01  6:39 UTC (permalink / raw)
  To: Baolu Lu, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe
  Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org

> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, June 28, 2024 7:25 PM
> 
> On 2024/6/28 15:43, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Thursday, June 27, 2024 4:22 PM
> >>
> >> On 2024/6/27 14:08, Tian, Kevin wrote:
> >>>> From: Lu Baolu <baolu.lu@linux.intel.com>
> >>>> Sent: Thursday, June 27, 2024 10:31 AM
> >>>>
> >>>> + */
> >>>> +void intel_context_flush_present(struct device_domain_info *info,
> >>>> +				 struct context_entry *context,
> >>>> +				 bool affect_domains)
> >>>> +{
> >>>> +	struct intel_iommu *iommu = info->iommu;
> >>>> +	u16 did = context_domain_id(context);
> >>>> +	struct pasid_entry *pte;
> >>>> +	int i;
> >>>> +
> >>>> +	assert_spin_locked(&iommu->lock);
> >>>> +
> >>>> +	/*
> >>>> +	 * Device-selective context-cache invalidation. The Domain-ID field
> >>>> +	 * of the Context-cache Invalidate Descriptor is ignored by hardware
> >>>> +	 * when operating in scalable mode. Therefore the @did value
> >>>> doesn't
> >>>> +	 * matter in scalable mode.
> >>>> +	 */
> >>>> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info-
> >>>>> devfn),
> >>>> +				   DMA_CCMD_MASK_NOBIT,
> >>>> DMA_CCMD_DEVICE_INVL);
> >>>> +
> >>>> +	/*
> >>>> +	 * For legacy mode:
> >>>> +	 * - Domain-selective IOTLB invalidation
> >>>> +	 * - Global Device-TLB invalidation to all affected functions
> >>>> +	 */
> >>>> +	if (!sm_supported(iommu)) {
> >>>> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> >>>> DMA_TLB_DSI_FLUSH);
> >>>> +		__context_flush_dev_iotlb(info);
> >>>> +
> >>>> +		return;
> >>>> +	}
> >>>> +
> >>>> +	/*
> >>>> +	 * For scalable mode:
> >>>> +	 * - Domain-selective PASID-cache invalidation to affected domains
> >>>> +	 * - Domain-selective IOTLB invalidation to affected domains
> >>>> +	 * - Global Device-TLB invalidation to affected functions
> >>>> +	 */
> >>>> +	if (affect_domains) {
> >>>> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
> >>>> +			pte = intel_pasid_get_entry(info->dev, i);
> >>>> +			if (!pte || !pasid_pte_is_present(pte))
> >>>> +				continue;
> >>>> +
> >>>> +			did = pasid_get_domain_id(pte);
> >>>> +			qi_flush_pasid_cache(iommu, did,
> >>>> QI_PC_ALL_PASIDS, 0);
> >>>> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
> >>>> DMA_TLB_DSI_FLUSH);
> >>>> +		}
> >>>> +	}
> >>>> +
> >>>> +	__context_flush_dev_iotlb(info);
> >>>> +}
> >>>> --
> >>>> 2.34.1
> >>>>
> >>>
> >>> this change moves the entire cache invalidation flow inside
> >>> iommu->lock. Though the directly-affected operations are not in
> >>> critical path the indirect impact applies to all other paths acquiring
> >>> iommu->lock given it'll be held unnecessarily longer after this
> >>> change.
> >>>
> >>> If the only requirement of holding iommu->lock is to walk the pasid
> >>> table, probably we can collect a bitmap of DID's in the locked walk
> >>> and then invalidate each in a loop outside of iommu->lock. Here
> >>> we only care about DIDs associated with the old context entry at
> >>> this point. New pasid attach will hit new context entry. Concurrent
> >>> pasid detach then may just come with duplicated invalidations.
> >>
> >> The iommu->lock is not only for the PASID table walk. The basic
> >> schematic here is that once a present context table entry is being
> >> changed, all PASID entries should not be altered until all the related
> >> caches have been flushed. This is because the configuration of the
> >> context entry might also impact PASID translation.
> >
> > Is it what the spec explicitly requires? My impression was that we
> > need to invalidate any cache which may be associated with the old
> > context entry, which is not equal to prohibiting PASID entries from
> > being changed at the same time (as long as those changes won't
> > cause a stale cache entry being active).
> 
> No, I didn't find that the VT-d spec explicitly requires this. But
> conceptually, the context entry controls features of the whole device,
> while pasid entries control the translation on a pasid. The change in
> context entry potentially impacts the pasid translation ...
> 
> >
> > e.g. let's say this helper collects valid pasid entries (2, 3, 4) and
> > associated DIDs (x, y, z) in a locked walk of the pasid table and
> > then follows the spec sequence to invalidate the pasid cache
> > for (2, 3, 4) and the iotlb for (x, y, z) outside of the lock.
> >
> > there could be several concurrent scenarios if iommu->lock is
> > only guarded on the pasid table walking:
> >
> > 1) pasid entry #1 is torn down before the locked walk. The
> > teardown path will invalidate its pasid cache and iotlb properly.
> > 2) pasid entry #4 is torn down after the locked walk. Then we may
> > see duplicated invalidations both in this helper and in the
> > teardown path.
> > 3) new pasid attach before locked walk may be associated with
> > either old or new context entry, depending on whether it's passed
> > the context cache invalidation. In any case it will be captured by
> > locked walk and then have related cache invalidated in the helper.
> > 4) new pasid attach after locked walk is always safe as related
> > cache will only be associated with the new context entry.
> 
> ... hence, from the driver's point of view, let's start from simplicity
> unless there's any real use case.
> 
> >>
> >> Previously, we did not apply this lock because all those cases involved
> >> changing the context entry from present to non-present, and we were
> >> certain that all PASID entries were empty. Now, as we are making it a
> >> generic helper that also serves scenarios where the entry remains
> >> present after the change, we need this lock to ensure that no PASID
> >> entry changes occur at the same time.
> >>
> >
> > Even if we want to do a conservative locking I prefer to not applying
> > it to existing paths which clearly have no need for extended lock.
> >
> > Then probably making a specific helper for pri flip makes more sense
> > than generalizing code to incur unnecessary lock overhead on existing
> > paths...
> 
> Yes, agreed with you. We don't need to extend this lock mechanism change
> to the existing cases where it's unnecessary.
> 
> How about below additional changes?

it's better

> 
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index c5d9c2283977..523407f6f6b2 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -1954,8 +1954,8 @@ static void domain_context_clear_one(struct
> device_domain_info *info, u8 bus, u8
> 
>   	context_clear_entry(context);
>   	__iommu_flush_cache(iommu, context, sizeof(*context));
> -	intel_context_flush_present(info, context, true);
>   	spin_unlock(&iommu->lock);
> +	intel_context_flush_present(info, context, true);
>   }
> 
>   static int domain_setup_first_level(struct intel_iommu *iommu,
> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
> index 01156135e60f..9a7b5668c723 100644
> --- a/drivers/iommu/intel/pasid.c
> +++ b/drivers/iommu/intel/pasid.c
> @@ -690,8 +690,8 @@ static void device_pasid_table_teardown(struct
> device *dev, u8 bus, u8 devfn)
> 
>   	context_clear_entry(context);
>   	__iommu_flush_cache(iommu, context, sizeof(*context));
> +	spin_unlock(&iommu->lock);
>   	intel_context_flush_present(info, context, false);
> -	spin_unlock(&iommu->lock);
>   }
> 
>   static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias,
> void *data)
> @@ -889,8 +889,6 @@ void intel_context_flush_present(struct
> device_domain_info *info,
>   	struct pasid_entry *pte;
>   	int i;
> 
> -	assert_spin_locked(&iommu->lock);
> -
>   	/*
>   	 * Device-selective context-cache invalidation. The Domain-ID field
>   	 * of the Context-cache Invalidate Descriptor is ignored by hardware
> @@ -919,6 +917,13 @@ void intel_context_flush_present(struct
> device_domain_info *info,
>   	 * - Global Device-TLB invalidation to affected functions
>   	 */
>   	if (flush_domains) {
> +		/*
> +		 * If the IOMMU is running in scalable mode and there might
> +		 * be potential PASID translations, the caller should hold
> +		 * the lock to ensure that context changes and cache flushes
> +		 * are atomic.
> +		 */
> +		assert_spin_locked(&iommu->lock);
>   		for (i = 0; i < info->pasid_table->max_pasid; i++) {
>   			pte = intel_pasid_get_entry(info->dev, i);
>   			if (!pte || !pasid_pte_is_present(pte))
> 
> Best regards,
> baolu

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-06-27  2:31 ` [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change Lu Baolu
  2024-06-27  6:08   ` Tian, Kevin
@ 2024-07-02  0:23   ` Jacob Pan
  2024-07-02  2:54     ` Baolu Lu
  1 sibling, 1 reply; 10+ messages in thread
From: Jacob Pan @ 2024-07-02  0:23 UTC (permalink / raw)
  To: Lu Baolu
  Cc: Joerg Roedel, Will Deacon, Robin Murphy, Jason Gunthorpe,
	Kevin Tian, iommu, linux-kernel, jacob.jun.pan


On Thu, 27 Jun 2024 10:31:20 +0800, Lu Baolu <baolu.lu@linux.intel.com>
wrote:

> +/*
> + * Cache invalidations after change in a context table entry that was
> present
> + * according to the Spec 6.5.3.3 (Guidance to Software for
> Invalidations). If
> + * IOMMU is in scalable mode and all PASID table entries of the device
> were
> + * non-present, set affect_domains to true. Otherwise, false.
> + */
The spec says:
"Domain-selective PASID-cache invalidation to affected domains (can be
skipped if all PASID entries were not-present and CM=0)"

So we should skip PASID cache invalidation if affect_domain is true
according to this comment.

> +void intel_context_flush_present(struct device_domain_info *info,
> +				 struct context_entry *context,
> +				 bool affect_domains)
> +{
> +	struct intel_iommu *iommu = info->iommu;
> +	u16 did = context_domain_id(context);
> +	struct pasid_entry *pte;
> +	int i;
> +
> +	assert_spin_locked(&iommu->lock);
> +
> +	/*
> +	 * Device-selective context-cache invalidation. The Domain-ID
> field
> +	 * of the Context-cache Invalidate Descriptor is ignored by
> hardware
> +	 * when operating in scalable mode. Therefore the @did value
> doesn't
> +	 * matter in scalable mode.
> +	 */
> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus,
> info->devfn),
> +				   DMA_CCMD_MASK_NOBIT,
> DMA_CCMD_DEVICE_INVL); +
> +	/*
> +	 * For legacy mode:
> +	 * - Domain-selective IOTLB invalidation
> +	 * - Global Device-TLB invalidation to all affected functions
> +	 */
> +	if (!sm_supported(iommu)) {
> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
> DMA_TLB_DSI_FLUSH);
> +		__context_flush_dev_iotlb(info);
> +
> +		return;
> +	}
> +
> +	/*
> +	 * For scalable mode:
> +	 * - Domain-selective PASID-cache invalidation to affected
> domains
> +	 * - Domain-selective IOTLB invalidation to affected domains
> +	 * - Global Device-TLB invalidation to affected functions
> +	 */
> +	if (affect_domains) {
> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
> +			pte = intel_pasid_get_entry(info->dev, i);
> +			if (!pte || !pasid_pte_is_present(pte))
> +				continue;
> +
> +			did = pasid_get_domain_id(pte);
> +			qi_flush_pasid_cache(iommu, did,
> QI_PC_ALL_PASIDS, 0);
This is conflicting with the comments above where PASID cache flush can be
skipped if affect_domain==true, no?

> +			iommu->flush.flush_iotlb(iommu, did, 0, 0,
> DMA_TLB_DSI_FLUSH);
> +		}
> +	}

Thanks,

Jacob

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change
  2024-07-02  0:23   ` Jacob Pan
@ 2024-07-02  2:54     ` Baolu Lu
  0 siblings, 0 replies; 10+ messages in thread
From: Baolu Lu @ 2024-07-02  2:54 UTC (permalink / raw)
  To: Jacob Pan
  Cc: baolu.lu, Joerg Roedel, Will Deacon, Robin Murphy,
	Jason Gunthorpe, Kevin Tian, iommu, linux-kernel

On 7/2/24 8:23 AM, Jacob Pan wrote:
> On Thu, 27 Jun 2024 10:31:20 +0800, Lu Baolu<baolu.lu@linux.intel.com>
> wrote:
> 
>> +/*
>> + * Cache invalidations after change in a context table entry that was
>> present
>> + * according to the Spec 6.5.3.3 (Guidance to Software for
>> Invalidations). If
>> + * IOMMU is in scalable mode and all PASID table entries of the device
>> were
>> + * non-present, set affect_domains to true. Otherwise, false.
>> + */
> The spec says:
> "Domain-selective PASID-cache invalidation to affected domains (can be
> skipped if all PASID entries were not-present and CM=0)"
> 
> So we should skip PASID cache invalidation if affect_domain is true
> according to this comment.
> 
>> +void intel_context_flush_present(struct device_domain_info *info,
>> +				 struct context_entry *context,
>> +				 bool affect_domains)
>> +{
>> +	struct intel_iommu *iommu = info->iommu;
>> +	u16 did = context_domain_id(context);
>> +	struct pasid_entry *pte;
>> +	int i;
>> +
>> +	assert_spin_locked(&iommu->lock);
>> +
>> +	/*
>> +	 * Device-selective context-cache invalidation. The Domain-ID
>> field
>> +	 * of the Context-cache Invalidate Descriptor is ignored by
>> hardware
>> +	 * when operating in scalable mode. Therefore the @did value
>> doesn't
>> +	 * matter in scalable mode.
>> +	 */
>> +	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus,
>> info->devfn),
>> +				   DMA_CCMD_MASK_NOBIT,
>> DMA_CCMD_DEVICE_INVL); +
>> +	/*
>> +	 * For legacy mode:
>> +	 * - Domain-selective IOTLB invalidation
>> +	 * - Global Device-TLB invalidation to all affected functions
>> +	 */
>> +	if (!sm_supported(iommu)) {
>> +		iommu->flush.flush_iotlb(iommu, did, 0, 0,
>> DMA_TLB_DSI_FLUSH);
>> +		__context_flush_dev_iotlb(info);
>> +
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * For scalable mode:
>> +	 * - Domain-selective PASID-cache invalidation to affected
>> domains
>> +	 * - Domain-selective IOTLB invalidation to affected domains
>> +	 * - Global Device-TLB invalidation to affected functions
>> +	 */
>> +	if (affect_domains) {
>> +		for (i = 0; i < info->pasid_table->max_pasid; i++) {
>> +			pte = intel_pasid_get_entry(info->dev, i);
>> +			if (!pte || !pasid_pte_is_present(pte))
>> +				continue;
>> +
>> +			did = pasid_get_domain_id(pte);
>> +			qi_flush_pasid_cache(iommu, did,
>> QI_PC_ALL_PASIDS, 0);
> This is conflicting with the comments above where PASID cache flush can be
> skipped if affect_domain==true, no?

Yes. I have fixed it in the v3.

Best regards,
baolu

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-07-02  2:56 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-27  2:31 [PATCH v2 0/2] iommu/vt-d: Refactor PRI enable/disable steps Lu Baolu
2024-06-27  2:31 ` [PATCH v2 1/2] iommu/vt-d: Add helper to flush caches for context change Lu Baolu
2024-06-27  6:08   ` Tian, Kevin
2024-06-27  8:21     ` Baolu Lu
2024-06-28  7:43       ` Tian, Kevin
2024-06-28 11:24         ` Baolu Lu
2024-07-01  6:39           ` Tian, Kevin
2024-07-02  0:23   ` Jacob Pan
2024-07-02  2:54     ` Baolu Lu
2024-06-27  2:31 ` [PATCH v2 2/2] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks Lu Baolu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox