From: Pranjal Shrivastava <praan@google.com>
To: Samiullah Khawaja <skhawaja@google.com>
Cc: David Woodhouse <dwmw2@infradead.org>,
Lu Baolu <baolu.lu@linux.intel.com>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Jason Gunthorpe <jgg@ziepe.ca>,
Robin Murphy <robin.murphy@arm.com>,
Kevin Tian <kevin.tian@intel.com>,
Alex Williamson <alex@shazbot.org>, Shuah Khan <shuah@kernel.org>,
iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, Saeed Mahameed <saeedm@nvidia.com>,
Adithya Jayachandran <ajayachandra@nvidia.com>,
Parav Pandit <parav@nvidia.com>,
Leon Romanovsky <leonro@nvidia.com>, William Tu <witu@nvidia.com>,
Pratyush Yadav <pratyush@kernel.org>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
David Matlack <dmatlack@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>, Vipin Sharma <vipinsh@google.com>,
YiFei Zhu <zhuyifei@google.com>
Subject: Re: [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device
Date: Tue, 19 May 2026 22:35:26 +0000 [thread overview]
Message-ID: <agzlrsK1p7hf6DJE@google.com> (raw)
In-Reply-To: <20260427175633.1978233-12-skhawaja@google.com>
On Mon, Apr 27, 2026 at 05:56:28PM +0000, Samiullah Khawaja wrote:
> In scalable mode the PASID table is used to fetch the io page tables.
> Preserve and restore the PASID table of the preserved devices.
>
> Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
> ---
> drivers/iommu/intel/iommu.c | 5 +-
> drivers/iommu/intel/iommu.h | 12 +++
> drivers/iommu/intel/liveupdate.c | 141 +++++++++++++++++++++++++++++++
> drivers/iommu/intel/pasid.c | 7 +-
> drivers/iommu/intel/pasid.h | 9 ++
> include/linux/kho/abi/iommu.h | 13 +++
> 6 files changed, 184 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index b90757164cd8..6d42051dcf7c 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -2951,8 +2951,10 @@ static int clear_unpreserve_context_entry_fn(struct device *dev,
> if (!info)
> return 0;
>
> - if (dev_is_pci(dev) && dev_iommu_preserved_state(dev))
> + if (dev_is_pci(dev) && dev_iommu_preserved_state(dev)) {
> + pasid_cleanup_preserved_table(dev);
> return 0;
> + }
>
> domain_context_clear(info);
> return 0;
> @@ -4013,6 +4015,7 @@ const struct iommu_ops intel_iommu_ops = {
> .page_response = intel_iommu_page_response,
> #ifdef CONFIG_IOMMU_LIVEUPDATE
> .preserve_device = intel_iommu_preserve_device,
> + .unpreserve_device = intel_iommu_unpreserve_device,
> .preserve = intel_iommu_preserve,
> .unpreserve = intel_iommu_unpreserve,
> #endif
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index 8e37acf7de12..62076a1a0b4d 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -1290,12 +1290,15 @@ static inline int iopf_for_domain_replace(struct iommu_domain *new,
> #ifdef CONFIG_IOMMU_LIVEUPDATE
> int intel_iommu_preserve_device(struct device *dev,
> struct iommu_device_ser *device_ser);
> +void intel_iommu_unpreserve_device(struct device *dev,
> + struct iommu_device_ser *device_ser);
> int intel_iommu_preserve(struct iommu_device *iommu,
> struct iommu_hw_ser *iommu_ser);
> void intel_iommu_unpreserve(struct iommu_device *iommu,
> struct iommu_hw_ser *iommu_ser);
> void intel_iommu_liveupdate_restore_root_table(struct intel_iommu *iommu,
> struct iommu_hw_ser *iommu_ser);
> +void pasid_cleanup_preserved_table(struct device *dev);
> #else
> static inline int intel_iommu_preserve_device(struct device *dev,
> struct iommu_device_ser *device_ser)
> @@ -1303,6 +1306,11 @@ static inline int intel_iommu_preserve_device(struct device *dev,
> return -EOPNOTSUPP;
> }
>
> +static inline void intel_iommu_unpreserve_device(struct device *dev,
> + struct iommu_device_ser *device_ser)
> +{
> +}
> +
> static inline int intel_iommu_preserve(struct iommu_device *iommu,
> struct iommu_hw_ser *iommu_ser)
> {
> @@ -1318,6 +1326,10 @@ static inline void intel_iommu_liveupdate_restore_root_table(struct intel_iommu
> struct iommu_hw_ser *iommu_ser)
> {
> }
> +
> +static inline void pasid_cleanup_preserved_table(struct device *dev)
> +{
> +}
> #endif
>
> #ifdef CONFIG_INTEL_IOMMU_SVM
> diff --git a/drivers/iommu/intel/liveupdate.c b/drivers/iommu/intel/liveupdate.c
> index 50a63812533f..404b485e97b9 100644
> --- a/drivers/iommu/intel/liveupdate.c
> +++ b/drivers/iommu/intel/liveupdate.c
> @@ -14,6 +14,7 @@
> #include <linux/pci.h>
>
> #include "iommu.h"
> +#include "pasid.h"
> #include "../iommu-pages.h"
>
> static void unpreserve_iommu_context_table(struct intel_iommu *iommu, int end)
> @@ -140,10 +141,96 @@ void intel_iommu_liveupdate_restore_root_table(struct intel_iommu *iommu,
> iommu_for_each_preserved_device(_restore_used_domain_ids, iommu);
> }
>
> +enum pasid_lu_op {
> + PASID_LU_OP_PRESERVE = 1,
> + PASID_LU_OP_UNPRESERVE,
> + PASID_LU_OP_RESTORE,
> + PASID_LU_OP_FREE,
> +};
> +
> +static int pasid_lu_do_op(void *table, enum pasid_lu_op op)
> +{
> + int ret = 0;
> +
> + switch (op) {
> + case PASID_LU_OP_PRESERVE:
> + ret = iommu_preserve_page(table);
Nit: This is making me consider renaming the helper as
`iommu_preserve_folio`. I almost thought why are we preserving a single
page.
> + break;
> + case PASID_LU_OP_UNPRESERVE:
> + iommu_unpreserve_page(table);
> + break;
> + case PASID_LU_OP_RESTORE:
> + iommu_restore_page(virt_to_phys(table));
> + break;
> + case PASID_LU_OP_FREE:
> + iommu_free_pages(table);
> + break;
> + }
> +
> + return ret;
> +}
> +
[snip]
> +
> +void pasid_cleanup_preserved_table(struct device *dev)
> +{
> + struct pasid_table *pasid_table;
> + struct pasid_dir_entry *dir;
> + struct pasid_entry *table;
> + size_t dir_size;
> +
> + pasid_table = intel_pasid_get_table(dev);
> + if (!pasid_table)
> + return;
> +
> + dir = pasid_table->table;
> + table = get_pasid_table_from_pde(&dir[0]);
> + if (!table)
> + return;
> +
> + /* Clear everything except the first entry in table. */
> + memset(&table[1], 0, SZ_4K - sizeof(*table));
Nit: Is the first entry always 4K or could it change based on PAGE_SIZE?
> +
> + /* Use the folio order to calculate the size of Pasid Directory */
> + dir_size = (1 << (folio_order(virt_to_folio(dir)) + PAGE_SHIFT));
> +
> + /* Clear everything except the first entry in directory */
> + memset(&dir[1], 0, dir_size - sizeof(struct pasid_dir_entry));
> +
> + clflush_cache_range(&table[0], SZ_4K);
> + clflush_cache_range(&dir[0], dir_size);
> +}
> +
[...]
> +void *intel_pasid_try_restore_table(struct device *dev, u64 max_pasid)
> +{
> + struct iommu_device_ser *ser = dev_iommu_restored_state(dev);
> +
> + if (!ser)
> + return NULL;
> +
> + BUG_ON(pasid_lu_handle_pd(phys_to_virt(ser->intel.pasid_table),
> + PASID_LU_OP_RESTORE));
> + if (WARN_ON_ONCE(ser->intel.max_pasid != max_pasid)) {
I'm wondering if this could be slightly relaxed to:
if (ser->intel.max_pasid < max_pasid) to ensure it's a minimum
requirement rather than an exact match?
> + pasid_lu_handle_pd(phys_to_virt(ser->intel.pasid_table),
> + PASID_LU_OP_FREE);
> + return NULL;
> + }
> +
> + return phys_to_virt(ser->intel.pasid_table);
> +}
> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
> index 89541b74ab8c..5cac8e95f73b 100644
> --- a/drivers/iommu/intel/pasid.c
> +++ b/drivers/iommu/intel/pasid.c
> @@ -60,8 +60,11 @@ int intel_pasid_alloc_table(struct device *dev)
>
> size = max_pasid >> (PASID_PDE_SHIFT - 3);
> order = size ? get_order(size) : 0;
> - dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
> - 1 << (order + PAGE_SHIFT));
> +
> + dir = intel_pasid_try_restore_table(dev, 1 << (order + PAGE_SHIFT + 3));
> + if (!dir)
> + dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
> + 1 << (order + PAGE_SHIFT));
> if (!dir) {
> kfree(pasid_table);
> return -ENOMEM;
Thanks,
Praan
next prev parent reply other threads:[~2026-05-19 22:35 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-27 17:56 [PATCH v2 00/16] iommu: Add live update state preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 01/16] liveupdate: luo_file: Add internal APIs for file preservation Samiullah Khawaja
2026-05-18 11:40 ` Pranjal Shrivastava
2026-05-18 19:08 ` Samiullah Khawaja
2026-05-29 16:12 ` Ankit Soni
2026-05-29 16:36 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 02/16] iommu: Implement IOMMU Live update FLB callbacks Samiullah Khawaja
2026-05-01 21:45 ` David Matlack
2026-05-18 11:52 ` Pranjal Shrivastava
2026-05-18 14:10 ` Pratyush Yadav
2026-05-18 15:08 ` Pranjal Shrivastava
2026-05-23 13:29 ` Jason Gunthorpe
2026-05-18 12:33 ` Pranjal Shrivastava
2026-05-18 17:20 ` Samiullah Khawaja
2026-05-18 17:32 ` Pranjal Shrivastava
2026-05-18 17:06 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 03/16] iommu: Implement IOMMU domain preservation Samiullah Khawaja
2026-05-01 22:08 ` David Matlack
2026-05-04 18:33 ` Samiullah Khawaja
2026-05-18 13:13 ` Pranjal Shrivastava
2026-05-18 18:55 ` Samiullah Khawaja
2026-05-18 21:36 ` Pranjal Shrivastava
2026-04-27 17:56 ` [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation Samiullah Khawaja
2026-05-01 22:42 ` David Matlack
2026-05-04 19:06 ` Samiullah Khawaja
2026-05-07 2:07 ` Baolu Lu
2026-05-07 18:47 ` Samiullah Khawaja
2026-05-18 14:01 ` Pranjal Shrivastava
2026-05-18 18:33 ` Samiullah Khawaja
2026-05-18 13:55 ` Pranjal Shrivastava
2026-05-18 18:44 ` Samiullah Khawaja
2026-06-01 6:19 ` Ankit Soni
2026-04-27 17:56 ` [PATCH v2 05/16] iommu/pages: Add APIs to preserve/unpreserve/restore iommu pages Samiullah Khawaja
2026-05-18 14:23 ` Pranjal Shrivastava
2026-05-18 17:22 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 06/16] iommupt: Implement preserve/unpreserve/restore callbacks Samiullah Khawaja
2026-05-07 2:55 ` Baolu Lu
2026-05-07 18:40 ` Samiullah Khawaja
2026-05-19 13:15 ` Pranjal Shrivastava
2026-05-19 17:14 ` Samiullah Khawaja
2026-05-23 13:33 ` Jason Gunthorpe
2026-05-27 17:11 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 07/16] iommu/vt-d: Implement device and iommu preserve/unpreserve ops Samiullah Khawaja
2026-05-07 6:25 ` Baolu Lu
2026-05-08 2:36 ` Samiullah Khawaja
2026-05-18 20:32 ` Samiullah Khawaja
2026-05-19 14:40 ` Pranjal Shrivastava
2026-05-19 18:26 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 08/16] iommu: Add APIs to get iommu and device preserved state Samiullah Khawaja
2026-05-19 15:52 ` Pranjal Shrivastava
2026-05-20 17:24 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 09/16] iommu/vt-d: Restore IOMMU state and reclaimed domain ids Samiullah Khawaja
2026-05-07 9:05 ` Baolu Lu
2026-05-07 17:35 ` Samiullah Khawaja
2026-05-19 21:46 ` Pranjal Shrivastava
2026-05-20 18:02 ` Pranjal Shrivastava
2026-05-20 19:59 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 10/16] iommu: Restore and reattach preserved domains to devices Samiullah Khawaja
2026-05-07 13:54 ` Baolu Lu
2026-05-07 16:52 ` Samiullah Khawaja
2026-05-29 16:43 ` Ankit Soni
2026-05-29 17:03 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device Samiullah Khawaja
2026-05-08 6:05 ` Baolu Lu
2026-05-11 18:45 ` Samiullah Khawaja
2026-05-12 11:32 ` Baolu Lu
2026-05-19 22:35 ` Pranjal Shrivastava [this message]
2026-05-20 18:13 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 12/16] iommufd: Implement ioctl to mark HWPT for preservation Samiullah Khawaja
2026-05-19 23:05 ` Pranjal Shrivastava
2026-05-20 19:50 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 13/16] iommufd: Persist iommu hardware pagetables for live update Samiullah Khawaja
2026-05-20 0:00 ` Pranjal Shrivastava
2026-05-20 19:40 ` Samiullah Khawaja
2026-05-22 16:01 ` Pranjal Shrivastava
2026-05-22 19:29 ` Pranjal Shrivastava
2026-04-27 17:56 ` [PATCH v2 14/16] iommufd: Add APIs to preserve/unpreserve a vfio cdev Samiullah Khawaja
2026-05-20 0:46 ` Pranjal Shrivastava
2026-04-27 17:56 ` [PATCH v2 15/16] vfio/pci: Preserve the iommufd state of the " Samiullah Khawaja
2026-05-20 0:57 ` Pranjal Shrivastava
2026-05-20 19:54 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 16/16] iommufd/selftest: Add test to verify iommufd preservation Samiullah Khawaja
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=agzlrsK1p7hf6DJE@google.com \
--to=praan@google.com \
--cc=ajayachandra@nvidia.com \
--cc=akpm@linux-foundation.org \
--cc=alex@shazbot.org \
--cc=baolu.lu@linux.intel.com \
--cc=chrisl@kernel.org \
--cc=dmatlack@google.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=leonro@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=parav@nvidia.com \
--cc=pasha.tatashin@soleen.com \
--cc=pratyush@kernel.org \
--cc=robin.murphy@arm.com \
--cc=saeedm@nvidia.com \
--cc=shuah@kernel.org \
--cc=skhawaja@google.com \
--cc=vipinsh@google.com \
--cc=will@kernel.org \
--cc=witu@nvidia.com \
--cc=zhuyifei@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.