patches.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Lu Baolu <baolu.lu@linux.intel.com>,
	David Woodhouse <dwmw2@infradead.org>,
	iommu@lists.linux.dev, Joerg Roedel <joro@8bytes.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Will Deacon <will@kernel.org>
Cc: Kevin Tian <kevin.tian@intel.com>,
	patches@lists.linux.dev, Tina Zhang <tina.zhang@intel.com>,
	Wei Wang <wei.w.wang@intel.com>
Subject: [PATCH v2 03/10] iommupt: Use the incoherent start/stop functions for PT_FEAT_DMA_INCOHERENT
Date: Tue, 26 Aug 2025 14:26:26 -0300	[thread overview]
Message-ID: <3-v2-44d4d9e727e7+18ad8-iommu_pt_vtd_jgg@nvidia.com> (raw)
In-Reply-To: <0-v2-44d4d9e727e7+18ad8-iommu_pt_vtd_jgg@nvidia.com>

This is the first step to supporting an incoherent walker, start and stop
the incoherence around the allocation and frees of the page table memory.

The iommu_pages API maps this to dma_map/unmap_single(), or arch cache
flushing calls.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/generic_pt/iommu_pt.h    | 81 ++++++++++++++++++++------
 drivers/iommu/generic_pt/kunit_iommu.h |  1 +
 drivers/iommu/generic_pt/pt_defs.h     |  3 +
 include/linux/generic_pt/common.h      |  6 ++
 include/linux/generic_pt/iommu.h       |  7 +++
 5 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 2a6c1bc2bc9be7..4789fe5361cb3a 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -24,6 +24,10 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
 {
 	struct pt_common *common = common_from_iommu(iommu_table);
 
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+		iommu_pages_stop_incoherent_list(free_list,
+						 iommu_table->iommu_device);
+
 	if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
 	    iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
 		iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
@@ -327,35 +331,55 @@ static int __collect_tables(struct pt_range *range, void *arg,
 	return 0;
 }
 
-static inline struct pt_table_p *table_alloc_top(struct pt_common *common,
-						 uintptr_t top_of_table,
-						 gfp_t gfp)
+enum alloc_mode {ALLOC_NORMAL, ALLOC_KEEP_INCOHERENT};
+
+/* Allocate a table, the empty table will be ready to be installed. */
+static inline struct pt_table_p *_table_alloc(struct pt_common *common,
+					      size_t lg2sz, gfp_t gfp,
+					      enum alloc_mode mode)
 {
 	struct pt_iommu *iommu_table = iommu_from_common(common);
+	struct pt_table_p *table_mem;
 
+	table_mem = iommu_alloc_pages_node_sz(iommu_table->nid, gfp,
+					      log2_to_int(lg2sz));
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) &&
+	    mode == ALLOC_NORMAL) {
+		int ret = iommu_pages_start_incoherent(
+			table_mem, iommu_table->iommu_device);
+		if (ret) {
+			iommu_free_pages(table_mem);
+			return ERR_PTR(ret);
+		}
+	}
+	return table_mem;
+}
+
+static inline struct pt_table_p *table_alloc_top(struct pt_common *common,
+						 uintptr_t top_of_table,
+						 gfp_t gfp,
+						 enum alloc_mode mode)
+{
 	/*
 	 * Top doesn't need the free list or otherwise, so it technically
 	 * doesn't need to use iommu pages. Use the API anyhow as the top is
 	 * usually not smaller than PAGE_SIZE to keep things simple.
 	 */
-	return iommu_alloc_pages_node_sz(
-		iommu_table->nid, gfp,
-		log2_to_int(pt_top_memsize_lg2(common, top_of_table)));
+	return _table_alloc(common, pt_top_memsize_lg2(common, top_of_table),
+			    gfp, mode);
 }
 
 /* Allocate an interior table */
 static inline struct pt_table_p *table_alloc(const struct pt_state *parent_pts,
-					     gfp_t gfp)
+					     gfp_t gfp, enum alloc_mode mode)
 {
-	struct pt_iommu *iommu_table =
-		iommu_from_common(parent_pts->range->common);
 	struct pt_state child_pts =
 		pt_init(parent_pts->range, parent_pts->level - 1, NULL);
 
-	return iommu_alloc_pages_node_sz(
-		iommu_table->nid, gfp,
-		log2_to_int(pt_num_items_lg2(&child_pts) +
-			    ilog2(PT_ITEM_WORD_SIZE)));
+	return _table_alloc(parent_pts->range->common,
+			    pt_num_items_lg2(&child_pts) +
+				    ilog2(PT_ITEM_WORD_SIZE),
+			    gfp, mode);
 }
 
 static inline int pt_iommu_new_table(struct pt_state *pts,
@@ -368,7 +392,7 @@ static inline int pt_iommu_new_table(struct pt_state *pts,
 	if (unlikely(!pt_can_have_table(pts)))
 		return -ENXIO;
 
-	table_mem = table_alloc(pts, attrs->gfp);
+	table_mem = table_alloc(pts, attrs->gfp, ALLOC_NORMAL);
 	if (IS_ERR(table_mem))
 		return PTR_ERR(table_mem);
 
@@ -606,8 +630,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
 		}
 
 		new_level = pts.level;
-		table_mem = table_alloc_top(
-			common, _pt_top_set(NULL, pts.level), map->attrs.gfp);
+		table_mem =
+			table_alloc_top(common, _pt_top_set(NULL, pts.level),
+					map->attrs.gfp, ALLOC_KEEP_INCOHERENT);
 		if (IS_ERR(table_mem))
 			return PTR_ERR(table_mem);
 		iommu_pages_list_add(&free_list, table_mem);
@@ -624,6 +649,16 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
 		new_top_of_table = _pt_top_set(pts.table, pts.level);
 	}
 
+	/*
+	 * Avoid double flushing, flush it once after all pt_install_table()
+	 */
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {
+		ret = iommu_pages_start_incoherent_list(
+			&free_list, iommu_table->iommu_device);
+		if (ret)
+			goto err_free;
+	}
+
 	/*
 	 * top_of_table is write locked by the spinlock, but readers can use
 	 * READ_ONCE() to get the value. Since we encode both the level and the
@@ -656,6 +691,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
 	return 0;
 
 err_free:
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+		iommu_pages_stop_incoherent_list(&free_list,
+						 iommu_table->iommu_device);
 	iommu_put_pages_list(&free_list);
 	return ret;
 }
@@ -971,6 +1009,9 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
 	 * The driver has to already have fenced the HW access to the page table
 	 * and invalidated any caching referring to this memory.
 	 */
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+		iommu_pages_stop_incoherent_list(&collect.free_list,
+						 iommu_table->iommu_device);
 	iommu_put_pages_list(&collect.free_list);
 }
 
@@ -1063,6 +1104,7 @@ static void pt_iommu_zero(struct pt_iommu_table *fmt_table)
 	memset_after(fmt_table, 0, iommu.domain);
 
 	/* The caller can initialize some of these values */
+	iommu_table->iommu_device = cfg.iommu_device;
 	iommu_table->hw_flush_ops = cfg.hw_flush_ops;
 	iommu_table->nid = cfg.nid;
 }
@@ -1107,11 +1149,16 @@ int pt_iommu_init(struct pt_iommu_table *fmt_table,
 	     pt_feature(common, PT_FEAT_DYNAMIC_TOP)))
 		return -EINVAL;
 
+	if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) &&
+	    WARN_ON(!iommu_table->iommu_device))
+		return -EINVAL;
+
 	ret = pt_iommu_init_domain(iommu_table, &iommu_table->domain);
 	if (ret)
 		return ret;
 
-	table_mem = table_alloc_top(common, common->top_of_table, gfp);
+	table_mem = table_alloc_top(common, common->top_of_table, gfp,
+				    ALLOC_NORMAL);
 	if (IS_ERR(table_mem))
 		return PTR_ERR(table_mem);
 	pt_top_set(common, table_mem, pt_top_get_level(common));
diff --git a/drivers/iommu/generic_pt/kunit_iommu.h b/drivers/iommu/generic_pt/kunit_iommu.h
index cca4e72efcaa04..45ecfa8ca5fa6c 100644
--- a/drivers/iommu/generic_pt/kunit_iommu.h
+++ b/drivers/iommu/generic_pt/kunit_iommu.h
@@ -139,6 +139,7 @@ static int pt_kunit_priv_init(struct kunit *test, struct kunit_iommu_priv *priv)
 
 	priv->fmt_table.iommu.nid = NUMA_NO_NODE;
 	priv->fmt_table.iommu.hw_flush_ops = &pt_kunit_flush_ops;
+	priv->fmt_table.iommu.iommu_device = priv->dummy_dev;
 	priv->domain.ops = &kunit_pt_ops;
 	ret = pt_iommu_init(&priv->fmt_table, &priv->cfg, GFP_KERNEL);
 	if (ret) {
diff --git a/drivers/iommu/generic_pt/pt_defs.h b/drivers/iommu/generic_pt/pt_defs.h
index 3673566708495d..869965883e6e51 100644
--- a/drivers/iommu/generic_pt/pt_defs.h
+++ b/drivers/iommu/generic_pt/pt_defs.h
@@ -55,6 +55,9 @@ enum {
 	PT_ORIG_SUPPORTED_FEATURES = PT_SUPPORTED_FEATURES,
 	PT_DEBUG_SUPPORTED_FEATURES =
 		UINT_MAX &
+		~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_DMA_INCOHERENT) ?
+			   0 :
+			   BIT(PT_FEAT_DMA_INCOHERENT))) &
 		~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_SIGN_EXTEND)) ?
 			  BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_FULL_VA) :
 			  BIT(PT_FEAT_SIGN_EXTEND)),
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index 7729008f17c799..1b97bbfaa4f90a 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -84,6 +84,12 @@ enum {
  * position.
  */
 enum pt_features {
+	/**
+	 * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
+	 * assuming the HW can read it. Otherwise a SMP release is sufficient
+	 * for HW to read it.
+	 */
+	PT_FEAT_DMA_INCOHERENT,
 	/**
 	 * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
 	 * PT_VADDR_MAX.
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 960281046e62b3..5dc3a960a8989e 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -57,6 +57,13 @@ struct pt_iommu {
 	 * table walkers.
 	 */
 	int nid;
+
+	/**
+	 * @iommu_device - Device pointer used for any DMA cache flushing when
+	 * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
+	 * page table which must have dma ops that perform cache flushing.
+	 */
+	struct device *iommu_device;
 };
 
 /**
-- 
2.43.0


  parent reply	other threads:[~2025-08-26 17:26 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-26 17:26 [PATCH v2 00/10] Convert Intel VT-D to use the generic iommu page table Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 01/10] iommu/pages: Add support for a incoherent IOMMU page walker Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 02/10] iommupt: Add basic support for SW bits in the page table Jason Gunthorpe
2025-08-26 17:26 ` Jason Gunthorpe [this message]
2025-08-26 17:26 ` [PATCH v2 04/10] iommupt: Flush the CPU cache after any writes to " Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 05/10] iommupt: Add the Intel VT-D second stage page table format Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 06/10] iommupt/x86: Set the dirty bit only for writable PTEs Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 07/10] iommupt/x86: Support SW bits and permit PT_FEAT_DMA_INCOHERENT Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 08/10] iommu/vt-d: Use the generic iommu page table Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 09/10] iommu/vt-d: Follow PT_FEAT_DMA_INCOHERENT into the PASID entry Jason Gunthorpe
2025-08-26 17:26 ` [PATCH v2 10/10] iommupt: Add a kunit test for the SW bits Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3-v2-44d4d9e727e7+18ad8-iommu_pt_vtd_jgg@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=dwmw2@infradead.org \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=patches@lists.linux.dev \
    --cc=robin.murphy@arm.com \
    --cc=tina.zhang@intel.com \
    --cc=wei.w.wang@intel.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).