public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Samiullah Khawaja <skhawaja@google.com>
To: David Woodhouse <dwmw2@infradead.org>,
	Lu Baolu <baolu.lu@linux.intel.com>,
	 Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Jason Gunthorpe <jgg@ziepe.ca>
Cc: Samiullah Khawaja <skhawaja@google.com>,
	Robin Murphy <robin.murphy@arm.com>,
	 Kevin Tian <kevin.tian@intel.com>,
	Alex Williamson <alex@shazbot.org>, Shuah Khan <shuah@kernel.org>,
	 iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org,  Saeed Mahameed <saeedm@nvidia.com>,
	Adithya Jayachandran <ajayachandra@nvidia.com>,
	 Parav Pandit <parav@nvidia.com>,
	Leon Romanovsky <leonro@nvidia.com>, William Tu <witu@nvidia.com>,
	 Pratyush Yadav <pratyush@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	 David Matlack <dmatlack@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	 Chris Li <chrisl@kernel.org>,
	Pranjal Shrivastava <praan@google.com>,
	Vipin Sharma <vipinsh@google.com>,
	 YiFei Zhu <zhuyifei@google.com>
Subject: [PATCH v2 06/16] iommupt: Implement preserve/unpreserve/restore callbacks
Date: Mon, 27 Apr 2026 17:56:23 +0000	[thread overview]
Message-ID: <20260427175633.1978233-7-skhawaja@google.com> (raw)
In-Reply-To: <20260427175633.1978233-1-skhawaja@google.com>

Implement the iommu domain ops for presevation, unpresevation and
restoration of iommu domains for liveupdate. Use the existing page
walker to preserve the ioptdesc of the top_table and the lower tables.

Preserve top_level, VASZ and FEAT Sign Extended to restore the domain in
the next kernel. On restore the domain has only the preserved features
enabled and all the other features are zeroed. This is ok since the
restored domain is made immutable and can only be freed. A kunit test is
added to verify that the IOMMU domain free can be done with trimmed
features.

Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
---
 drivers/iommu/generic_pt/iommu_pt.h       | 131 ++++++++++++++++++++++
 drivers/iommu/generic_pt/kunit_iommu_pt.h |  28 +++++
 include/linux/generic_pt/iommu.h          |  19 +++-
 3 files changed, 177 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 19b6daf88f2a..7bca827e3a55 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -961,6 +961,133 @@ static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
 	return ret;
 }
 
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+/**
+ * unpreserve() - Unpreserve page tables and other state of a domain.
+ * @domain: Domain to unpreserve
+ */
+void DOMAIN_NS(unpreserve)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
+{
+	struct pt_iommu *iommu_table =
+		container_of(domain, struct pt_iommu, domain);
+	struct pt_common *common = common_from_iommu(iommu_table);
+	struct pt_range range = pt_all_range(common);
+	struct pt_iommu_collect_args collect = {
+		.free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
+	};
+
+	iommu_pages_list_add(&collect.free_list, range.top_table);
+	pt_walk_range(&range, __collect_tables, &collect);
+
+	iommu_unpreserve_pages(&collect.free_list);
+}
+EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unpreserve), "GENERIC_PT_IOMMU");
+
+/**
+ * preserve() - Preserve page tables and other state of a domain.
+ * @domain: Domain to preserve
+ *
+ * Returns: -ERRNO on failure, 0 on success.
+ */
+int DOMAIN_NS(preserve)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
+{
+	struct pt_iommu *iommu_table =
+		container_of(domain, struct pt_iommu, domain);
+	struct pt_common *common = common_from_iommu(iommu_table);
+	struct pt_range range = pt_all_range(common);
+	struct pt_iommu_collect_args collect = {
+		.free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
+	};
+	int ret;
+
+	iommu_pages_list_add(&collect.free_list, range.top_table);
+	pt_walk_range(&range, __collect_tables, &collect);
+
+	ret = iommu_preserve_pages(&collect.free_list);
+	if (ret)
+		return ret;
+
+	ser->top_table_phys = virt_to_phys(range.top_table);
+	ser->top_level = range.top_level;
+
+	/*
+	 * VASZ and SIGN_EXTEND will be needed in next kernel for collector page
+	 * table walk to restore and free pages.
+	 */
+	ser->vasz = common->max_vasz_lg2;
+	ser->sign_extend = pt_feature(common, PT_FEAT_SIGN_EXTEND);
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(preserve), "GENERIC_PT_IOMMU");
+
+static int __restore_tables(struct pt_range *range, void *arg,
+			    unsigned int level, struct pt_table_p *table)
+{
+	struct pt_state pts = pt_init(range, level, table);
+	int ret;
+
+	for_each_pt_level_entry(&pts) {
+		if (pts.type == PT_ENTRY_TABLE) {
+			iommu_restore_page(virt_to_phys(pts.table_lower));
+
+			/*
+			 * pt_descend can only fail if pts.table_lower is not
+			 * init. So the if statement below is dead code.
+			 */
+			ret = pt_descend(&pts, arg, __restore_tables);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct pt_iommu_ops NS(ops_immutable);
+
+/**
+ * restore() - Restore page tables and other state of a domain.
+ * @domain: Domain to preserve
+ *
+ * Returns: -ERRNO on failure, 0 on success.
+ */
+int DOMAIN_NS(restore)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
+{
+	struct pt_iommu *iommu_table =
+		container_of(domain, struct pt_iommu, domain);
+	struct pt_common *common = common_from_iommu(iommu_table);
+	struct pt_range range;
+
+	common->max_vasz_lg2 = ser->vasz;
+
+	/* Make this domain immutable.*/
+	iommu_table->ops = &NS(ops_immutable);
+
+	/*
+	 * It is safe to override this here since this domain is immutable and
+	 * can only be freed.
+	 */
+	common->features = 0;
+	if (ser->sign_extend)
+		common->features |= BIT(PT_FEAT_SIGN_EXTEND);
+
+	range = pt_all_range(common);
+	iommu_restore_page(ser->top_table_phys);
+
+	/* Free new table */
+	iommu_free_pages(range.top_table);
+
+	/* Set the restored top table */
+	pt_top_set(common, phys_to_virt(ser->top_table_phys), ser->top_level);
+
+	/* Restore all pages*/
+	range = pt_all_range(common);
+	return pt_walk_range(&range, __restore_tables, NULL);
+}
+EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(restore), "GENERIC_PT_IOMMU");
+#endif
+
 struct pt_unmap_args {
 	struct iommu_pages_list free_list;
 	pt_vaddr_t unmapped;
@@ -1138,6 +1265,10 @@ static const struct pt_iommu_ops NS(ops) = {
 	.deinit = NS(deinit),
 };
 
+static const struct pt_iommu_ops NS(ops_immutable) = {
+	.deinit = NS(deinit),
+};
+
 static int pt_init_common(struct pt_common *common)
 {
 	struct pt_range top_range = pt_top_range(common);
diff --git a/drivers/iommu/generic_pt/kunit_iommu_pt.h b/drivers/iommu/generic_pt/kunit_iommu_pt.h
index e8a63c8ea850..af1918d693ed 100644
--- a/drivers/iommu/generic_pt/kunit_iommu_pt.h
+++ b/drivers/iommu/generic_pt/kunit_iommu_pt.h
@@ -426,6 +426,33 @@ static void test_mixed(struct kunit *test)
 	check_iova(test, start, oa, len);
 }
 
+static void test_restore_free(struct kunit *test)
+{
+	struct kunit_iommu_priv *priv = test->priv;
+	struct pt_range top_range = pt_top_range(priv->common);
+	u64 start = 0x3fe400ULL << 12;
+	u64 end = 0x4c0600ULL << 12;
+	pt_vaddr_t len = end - start;
+
+	if (top_range.last_va <= start || sizeof(unsigned long) == 4)
+		kunit_skip(test, "range is too small");
+	if ((priv->safe_pgsize_bitmap & GENMASK(30, 21)) != (BIT(30) | BIT(21)))
+		kunit_skip(test, "incompatible psize");
+
+	/* Map a large mixed range to populate multiple levels of page tables */
+	do_map(test, start, start, len);
+
+	/*
+	 * Simulate a restored state by clearing all features except
+	 * SIGN_EXTEND. This verifies that the generic page table free walker
+	 * can correctly tear down a populated domain when other features are
+	 * zeroed.
+	 */
+	priv->common->features &= BIT(PT_FEAT_SIGN_EXTEND);
+
+	/* The domain will be freed when the test exits. */
+}
+
 static struct kunit_case iommu_test_cases[] = {
 	KUNIT_CASE_FMT(test_increase_level),
 	KUNIT_CASE_FMT(test_map_simple),
@@ -434,6 +461,7 @@ static struct kunit_case iommu_test_cases[] = {
 	KUNIT_CASE_FMT(test_random_map),
 	KUNIT_CASE_FMT(test_pgsize_boundary),
 	KUNIT_CASE_FMT(test_mixed),
+	KUNIT_CASE_FMT(test_restore_free),
 	{},
 };
 
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index dd0edd02a48a..649b3b9eb1a0 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -13,6 +13,7 @@ struct iommu_iotlb_gather;
 struct pt_iommu_ops;
 struct pt_iommu_driver_ops;
 struct iommu_dirty_bitmap;
+struct iommu_domain_ser;
 
 /**
  * DOC: IOMMU Radix Page Table
@@ -251,6 +252,12 @@ struct pt_iommu_cfg {
 #define IOMMU_PROTOTYPES(fmt)                                                  \
 	phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
 						  dma_addr_t iova);            \
+	int pt_iommu_##fmt##_preserve(struct iommu_domain *domain,             \
+				      struct iommu_domain_ser *ser);           \
+	void pt_iommu_##fmt##_unpreserve(struct iommu_domain *domain,          \
+					 struct iommu_domain_ser *ser);        \
+	int pt_iommu_##fmt##_restore(struct iommu_domain *domain,              \
+				     struct iommu_domain_ser *ser);            \
 	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
 		struct iommu_domain *domain, unsigned long iova, size_t size,  \
 		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
@@ -266,12 +273,22 @@ struct pt_iommu_cfg {
 	};                              \
 	IOMMU_PROTOTYPES(fmt)
 
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+#define IOMMU_PT_LIVEUPDATE_OPS(fmt)			\
+	, .preserve = &pt_iommu_##fmt##_preserve,	\
+	.unpreserve = &pt_iommu_##fmt##_unpreserve,	\
+	.restore = &pt_iommu_##fmt##_restore
+#else
+#define IOMMU_PT_LIVEUPDATE_OPS(fmt)
+#endif
+
 /*
  * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
  * iommu_pt
  */
 #define IOMMU_PT_DOMAIN_OPS(fmt)                        \
-	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys
+	.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys	\
+	IOMMU_PT_LIVEUPDATE_OPS(fmt)
 #define IOMMU_PT_DIRTY_OPS(fmt) \
 	.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
 
-- 
2.54.0.545.g6539524ca2-goog


  parent reply	other threads:[~2026-04-27 17:56 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-27 17:56 [PATCH v2 00/16] iommu: Add live update state preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 01/16] liveupdate: luo_file: Add internal APIs for file preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 02/16] iommu: Implement IOMMU Live update FLB callbacks Samiullah Khawaja
2026-05-01 21:45   ` David Matlack
2026-04-27 17:56 ` [PATCH v2 03/16] iommu: Implement IOMMU domain preservation Samiullah Khawaja
2026-05-01 22:08   ` David Matlack
2026-05-04 18:33     ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation Samiullah Khawaja
2026-05-01 22:42   ` David Matlack
2026-05-04 19:06     ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 05/16] iommu/pages: Add APIs to preserve/unpreserve/restore iommu pages Samiullah Khawaja
2026-04-27 17:56 ` Samiullah Khawaja [this message]
2026-04-27 17:56 ` [PATCH v2 07/16] iommu/vt-d: Implement device and iommu preserve/unpreserve ops Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 08/16] iommu: Add APIs to get iommu and device preserved state Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 09/16] iommu/vt-d: Restore IOMMU state and reclaimed domain ids Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 10/16] iommu: Restore and reattach preserved domains to devices Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 12/16] iommufd: Implement ioctl to mark HWPT for preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 13/16] iommufd: Persist iommu hardware pagetables for live update Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 14/16] iommufd: Add APIs to preserve/unpreserve a vfio cdev Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 15/16] vfio/pci: Preserve the iommufd state of the " Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 16/16] iommufd/selftest: Add test to verify iommufd preservation Samiullah Khawaja

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260427175633.1978233-7-skhawaja@google.com \
    --to=skhawaja@google.com \
    --cc=ajayachandra@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex@shazbot.org \
    --cc=baolu.lu@linux.intel.com \
    --cc=chrisl@kernel.org \
    --cc=dmatlack@google.com \
    --cc=dwmw2@infradead.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=parav@nvidia.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=praan@google.com \
    --cc=pratyush@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=saeedm@nvidia.com \
    --cc=shuah@kernel.org \
    --cc=vipinsh@google.com \
    --cc=will@kernel.org \
    --cc=witu@nvidia.com \
    --cc=zhuyifei@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox