public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Baolu Lu <baolu.lu@linux.intel.com>
To: Samiullah Khawaja <skhawaja@google.com>,
	David Woodhouse <dwmw2@infradead.org>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Jason Gunthorpe <jgg@ziepe.ca>
Cc: Robin Murphy <robin.murphy@arm.com>,
	Kevin Tian <kevin.tian@intel.com>,
	Alex Williamson <alex@shazbot.org>, Shuah Khan <shuah@kernel.org>,
	iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, Saeed Mahameed <saeedm@nvidia.com>,
	Adithya Jayachandran <ajayachandra@nvidia.com>,
	Parav Pandit <parav@nvidia.com>,
	Leon Romanovsky <leonro@nvidia.com>, William Tu <witu@nvidia.com>,
	Pratyush Yadav <pratyush@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	David Matlack <dmatlack@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Chris Li <chrisl@kernel.org>,
	Pranjal Shrivastava <praan@google.com>,
	Vipin Sharma <vipinsh@google.com>,
	YiFei Zhu <zhuyifei@google.com>
Subject: Re: [PATCH v2 06/16] iommupt: Implement preserve/unpreserve/restore callbacks
Date: Thu, 7 May 2026 10:55:42 +0800	[thread overview]
Message-ID: <39a010b4-7099-434a-b8de-5d03a75f2a9f@linux.intel.com> (raw)
In-Reply-To: <20260427175633.1978233-7-skhawaja@google.com>

On 4/28/26 01:56, Samiullah Khawaja wrote:
> Implement the iommu domain ops for presevation, unpresevation and
> restoration of iommu domains for liveupdate. Use the existing page
> walker to preserve the ioptdesc of the top_table and the lower tables.
> 
> Preserve top_level, VASZ and FEAT Sign Extended to restore the domain in
> the next kernel. On restore the domain has only the preserved features
> enabled and all the other features are zeroed. This is ok since the
> restored domain is made immutable and can only be freed. A kunit test is
> added to verify that the IOMMU domain free can be done with trimmed
> features.
> 
> Signed-off-by: Samiullah Khawaja<skhawaja@google.com>
> ---
>   drivers/iommu/generic_pt/iommu_pt.h       | 131 ++++++++++++++++++++++
>   drivers/iommu/generic_pt/kunit_iommu_pt.h |  28 +++++
>   include/linux/generic_pt/iommu.h          |  19 +++-
>   3 files changed, 177 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
> index 19b6daf88f2a..7bca827e3a55 100644
> --- a/drivers/iommu/generic_pt/iommu_pt.h
> +++ b/drivers/iommu/generic_pt/iommu_pt.h
> @@ -961,6 +961,133 @@ static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
>   	return ret;
>   }
>   
> +#ifdef CONFIG_IOMMU_LIVEUPDATE
> +/**
> + * unpreserve() - Unpreserve page tables and other state of a domain.
> + * @domain: Domain to unpreserve
> + */
> +void DOMAIN_NS(unpreserve)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
> +{
> +	struct pt_iommu *iommu_table =
> +		container_of(domain, struct pt_iommu, domain);
> +	struct pt_common *common = common_from_iommu(iommu_table);
> +	struct pt_range range = pt_all_range(common);
> +	struct pt_iommu_collect_args collect = {
> +		.free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
> +	};
> +
> +	iommu_pages_list_add(&collect.free_list, range.top_table);
> +	pt_walk_range(&range, __collect_tables, &collect);
> +
> +	iommu_unpreserve_pages(&collect.free_list);
> +}
> +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unpreserve), "GENERIC_PT_IOMMU");
> +
> +/**
> + * preserve() - Preserve page tables and other state of a domain.
> + * @domain: Domain to preserve
> + *
> + * Returns: -ERRNO on failure, 0 on success.
> + */
> +int DOMAIN_NS(preserve)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
> +{
> +	struct pt_iommu *iommu_table =
> +		container_of(domain, struct pt_iommu, domain);
> +	struct pt_common *common = common_from_iommu(iommu_table);
> +	struct pt_range range = pt_all_range(common);
> +	struct pt_iommu_collect_args collect = {
> +		.free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
> +	};
> +	int ret;
> +
> +	iommu_pages_list_add(&collect.free_list, range.top_table);
> +	pt_walk_range(&range, __collect_tables, &collect);
> +
> +	ret = iommu_preserve_pages(&collect.free_list);
> +	if (ret)
> +		return ret;
> +
> +	ser->top_table_phys = virt_to_phys(range.top_table);
> +	ser->top_level = range.top_level;
> +
> +	/*
> +	 * VASZ and SIGN_EXTEND will be needed in next kernel for collector page
> +	 * table walk to restore and free pages.
> +	 */
> +	ser->vasz = common->max_vasz_lg2;
> +	ser->sign_extend = pt_feature(common, PT_FEAT_SIGN_EXTEND);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(preserve), "GENERIC_PT_IOMMU");
> +
> +static int __restore_tables(struct pt_range *range, void *arg,
> +			    unsigned int level, struct pt_table_p *table)
> +{
> +	struct pt_state pts = pt_init(range, level, table);
> +	int ret;
> +
> +	for_each_pt_level_entry(&pts) {
> +		if (pts.type == PT_ENTRY_TABLE) {
> +			iommu_restore_page(virt_to_phys(pts.table_lower));
> +
> +			/*
> +			 * pt_descend can only fail if pts.table_lower is not
> +			 * init. So the if statement below is dead code.
> +			 */
> +			ret = pt_descend(&pts, arg, __restore_tables);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct pt_iommu_ops NS(ops_immutable);
> +
> +/**
> + * restore() - Restore page tables and other state of a domain.
> + * @domain: Domain to preserve
> + *
> + * Returns: -ERRNO on failure, 0 on success.
> + */
> +int DOMAIN_NS(restore)(struct iommu_domain *domain, struct iommu_domain_ser *ser)
> +{
> +	struct pt_iommu *iommu_table =
> +		container_of(domain, struct pt_iommu, domain);
> +	struct pt_common *common = common_from_iommu(iommu_table);
> +	struct pt_range range;
> +
> +	common->max_vasz_lg2 = ser->vasz;
> +
> +	/* Make this domain immutable.*/
> +	iommu_table->ops = &NS(ops_immutable);
> +
> +	/*
> +	 * It is safe to override this here since this domain is immutable and
> +	 * can only be freed.
> +	 */
> +	common->features = 0;
> +	if (ser->sign_extend)
> +		common->features |= BIT(PT_FEAT_SIGN_EXTEND);
> +
> +	range = pt_all_range(common);
> +	iommu_restore_page(ser->top_table_phys);
> +
> +	/* Free new table */
> +	iommu_free_pages(range.top_table);
> +
> +	/* Set the restored top table */
> +	pt_top_set(common, phys_to_virt(ser->top_table_phys), ser->top_level);
> +
> +	/* Restore all pages*/
> +	range = pt_all_range(common);
> +	return pt_walk_range(&range, __restore_tables, NULL);
> +}
> +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(restore), "GENERIC_PT_IOMMU");
> +#endif
> +
>   struct pt_unmap_args {
>   	struct iommu_pages_list free_list;
>   	pt_vaddr_t unmapped;
> @@ -1138,6 +1265,10 @@ static const struct pt_iommu_ops NS(ops) = {
>   	.deinit = NS(deinit),
>   };
>   
> +static const struct pt_iommu_ops NS(ops_immutable) = {
> +	.deinit = NS(deinit),
> +};
> +
>   static int pt_init_common(struct pt_common *common)
>   {
>   	struct pt_range top_range = pt_top_range(common);
> diff --git a/drivers/iommu/generic_pt/kunit_iommu_pt.h b/drivers/iommu/generic_pt/kunit_iommu_pt.h
> index e8a63c8ea850..af1918d693ed 100644
> --- a/drivers/iommu/generic_pt/kunit_iommu_pt.h
> +++ b/drivers/iommu/generic_pt/kunit_iommu_pt.h
> @@ -426,6 +426,33 @@ static void test_mixed(struct kunit *test)
>   	check_iova(test, start, oa, len);
>   }
>   
> +static void test_restore_free(struct kunit *test)
> +{
> +	struct kunit_iommu_priv *priv = test->priv;
> +	struct pt_range top_range = pt_top_range(priv->common);
> +	u64 start = 0x3fe400ULL << 12;
> +	u64 end = 0x4c0600ULL << 12;
> +	pt_vaddr_t len = end - start;
> +
> +	if (top_range.last_va <= start || sizeof(unsigned long) == 4)
> +		kunit_skip(test, "range is too small");
> +	if ((priv->safe_pgsize_bitmap & GENMASK(30, 21)) != (BIT(30) | BIT(21)))
> +		kunit_skip(test, "incompatible psize");
> +
> +	/* Map a large mixed range to populate multiple levels of page tables */
> +	do_map(test, start, start, len);
> +
> +	/*
> +	 * Simulate a restored state by clearing all features except
> +	 * SIGN_EXTEND. This verifies that the generic page table free walker
> +	 * can correctly tear down a populated domain when other features are
> +	 * zeroed.
> +	 */
> +	priv->common->features &= BIT(PT_FEAT_SIGN_EXTEND);
> +
> +	/* The domain will be freed when the test exits. */
> +}
> +
>   static struct kunit_case iommu_test_cases[] = {
>   	KUNIT_CASE_FMT(test_increase_level),
>   	KUNIT_CASE_FMT(test_map_simple),
> @@ -434,6 +461,7 @@ static struct kunit_case iommu_test_cases[] = {
>   	KUNIT_CASE_FMT(test_random_map),
>   	KUNIT_CASE_FMT(test_pgsize_boundary),
>   	KUNIT_CASE_FMT(test_mixed),
> +	KUNIT_CASE_FMT(test_restore_free),
>   	{},
>   };
>   
> diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
> index dd0edd02a48a..649b3b9eb1a0 100644
> --- a/include/linux/generic_pt/iommu.h
> +++ b/include/linux/generic_pt/iommu.h
> @@ -13,6 +13,7 @@ struct iommu_iotlb_gather;
>   struct pt_iommu_ops;
>   struct pt_iommu_driver_ops;
>   struct iommu_dirty_bitmap;
> +struct iommu_domain_ser;
>   
>   /**
>    * DOC: IOMMU Radix Page Table
> @@ -251,6 +252,12 @@ struct pt_iommu_cfg {
>   #define IOMMU_PROTOTYPES(fmt)                                                  \
>   	phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
>   						  dma_addr_t iova);            \
> +	int pt_iommu_##fmt##_preserve(struct iommu_domain *domain,             \
> +				      struct iommu_domain_ser *ser);           \
> +	void pt_iommu_##fmt##_unpreserve(struct iommu_domain *domain,          \
> +					 struct iommu_domain_ser *ser);        \
> +	int pt_iommu_##fmt##_restore(struct iommu_domain *domain,              \
> +				     struct iommu_domain_ser *ser);            \
>   	int pt_iommu_##fmt##_read_and_clear_dirty(                             \
>   		struct iommu_domain *domain, unsigned long iova, size_t size,  \
>   		unsigned long flags, struct iommu_dirty_bitmap *dirty);        \
> @@ -266,12 +273,22 @@ struct pt_iommu_cfg {
>   	};                              \
>   	IOMMU_PROTOTYPES(fmt)
>   
> +#ifdef CONFIG_IOMMU_LIVEUPDATE
> +#define IOMMU_PT_LIVEUPDATE_OPS(fmt)			\
> +	, .preserve = &pt_iommu_##fmt##_preserve,	\
> +	.unpreserve = &pt_iommu_##fmt##_unpreserve,	\
> +	.restore = &pt_iommu_##fmt##_restore

Nit: would it look better if we put it like this?

#define IOMMU_PT_LIVEUPDATE_OPS(fmt)			\
	, .preserve = &pt_iommu_##fmt##_preserve	\
	, .unpreserve = &pt_iommu_##fmt##_unpreserve	\
	, .restore = &pt_iommu_##fmt##_restore

> +#else
> +#define IOMMU_PT_LIVEUPDATE_OPS(fmt)
> +#endif

Thanks,
baolu

  reply	other threads:[~2026-05-07  2:58 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-27 17:56 [PATCH v2 00/16] iommu: Add live update state preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 01/16] liveupdate: luo_file: Add internal APIs for file preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 02/16] iommu: Implement IOMMU Live update FLB callbacks Samiullah Khawaja
2026-05-01 21:45   ` David Matlack
2026-04-27 17:56 ` [PATCH v2 03/16] iommu: Implement IOMMU domain preservation Samiullah Khawaja
2026-05-01 22:08   ` David Matlack
2026-05-04 18:33     ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation Samiullah Khawaja
2026-05-01 22:42   ` David Matlack
2026-05-04 19:06     ` Samiullah Khawaja
2026-05-07  2:07   ` Baolu Lu
2026-04-27 17:56 ` [PATCH v2 05/16] iommu/pages: Add APIs to preserve/unpreserve/restore iommu pages Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 06/16] iommupt: Implement preserve/unpreserve/restore callbacks Samiullah Khawaja
2026-05-07  2:55   ` Baolu Lu [this message]
2026-04-27 17:56 ` [PATCH v2 07/16] iommu/vt-d: Implement device and iommu preserve/unpreserve ops Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 08/16] iommu: Add APIs to get iommu and device preserved state Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 09/16] iommu/vt-d: Restore IOMMU state and reclaimed domain ids Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 10/16] iommu: Restore and reattach preserved domains to devices Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 12/16] iommufd: Implement ioctl to mark HWPT for preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 13/16] iommufd: Persist iommu hardware pagetables for live update Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 14/16] iommufd: Add APIs to preserve/unpreserve a vfio cdev Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 15/16] vfio/pci: Preserve the iommufd state of the " Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 16/16] iommufd/selftest: Add test to verify iommufd preservation Samiullah Khawaja

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=39a010b4-7099-434a-b8de-5d03a75f2a9f@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=ajayachandra@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex@shazbot.org \
    --cc=chrisl@kernel.org \
    --cc=dmatlack@google.com \
    --cc=dwmw2@infradead.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=parav@nvidia.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=praan@google.com \
    --cc=pratyush@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=saeedm@nvidia.com \
    --cc=shuah@kernel.org \
    --cc=skhawaja@google.com \
    --cc=vipinsh@google.com \
    --cc=will@kernel.org \
    --cc=witu@nvidia.com \
    --cc=zhuyifei@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox