From: Samiullah Khawaja <skhawaja@google.com>
To: David Woodhouse <dwmw2@infradead.org>,
Lu Baolu <baolu.lu@linux.intel.com>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Jason Gunthorpe <jgg@ziepe.ca>
Cc: Samiullah Khawaja <skhawaja@google.com>,
Robin Murphy <robin.murphy@arm.com>,
Kevin Tian <kevin.tian@intel.com>,
Alex Williamson <alex@shazbot.org>, Shuah Khan <shuah@kernel.org>,
iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, Saeed Mahameed <saeedm@nvidia.com>,
Adithya Jayachandran <ajayachandra@nvidia.com>,
Parav Pandit <parav@nvidia.com>,
Leon Romanovsky <leonro@nvidia.com>, William Tu <witu@nvidia.com>,
Pratyush Yadav <pratyush@kernel.org>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
David Matlack <dmatlack@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>,
Pranjal Shrivastava <praan@google.com>,
Vipin Sharma <vipinsh@google.com>,
YiFei Zhu <zhuyifei@google.com>
Subject: [PATCH v2 10/16] iommu: Restore and reattach preserved domains to devices
Date: Mon, 27 Apr 2026 17:56:27 +0000 [thread overview]
Message-ID: <20260427175633.1978233-11-skhawaja@google.com> (raw)
In-Reply-To: <20260427175633.1978233-1-skhawaja@google.com>
Restore the preserved domains by restoring the page tables using restore
IOMMU domain op. Reattach the preserved domain to the device during
default domain setup. While attaching, reuse the domain ID that was used
in the previous kernel. The context entry setup is not needed as that is
preserved during liveupdate.
Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
---
drivers/iommu/intel/iommu.c | 49 ++++++++++++++------
drivers/iommu/intel/iommu.h | 3 +-
drivers/iommu/intel/nested.c | 2 +-
drivers/iommu/iommu.c | 61 ++++++++++++++++++++++++-
drivers/iommu/liveupdate.c | 78 ++++++++++++++++++++++++++++++++
include/linux/iommu-liveupdate.h | 50 ++++++++++++++++++++
6 files changed, 224 insertions(+), 19 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4118a0861f38..b90757164cd8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1031,7 +1031,8 @@ static bool first_level_by_default(struct intel_iommu *iommu)
return true;
}
-int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu,
+ int restore_did)
{
struct iommu_domain_info *info, *curr;
int num, ret = -ENOSPC;
@@ -1051,8 +1052,11 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
return 0;
}
- num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
- cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
+ if (restore_did >= IDA_START_DID)
+ num = restore_did;
+ else
+ num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
+ cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
if (num < 0) {
pr_err("%s: No free domain ids\n", iommu->name);
goto err_unlock;
@@ -1320,10 +1324,14 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ struct device_ser *device_ser = NULL;
unsigned long flags;
int ret;
- ret = domain_attach_iommu(domain, iommu);
+ device_ser = dev_iommu_restored_state(dev);
+
+ ret = domain_attach_iommu(domain, iommu,
+ dev_iommu_restore_did(dev, &domain->domain));
if (ret)
return ret;
@@ -1336,16 +1344,18 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (dev_is_real_dma_subdevice(dev))
return 0;
- if (!sm_supported(iommu))
- ret = domain_context_mapping(domain, dev);
- else if (intel_domain_is_fs_paging(domain))
- ret = domain_setup_first_level(iommu, domain, dev,
- IOMMU_NO_PASID, NULL);
- else if (intel_domain_is_ss_paging(domain))
- ret = domain_setup_second_level(iommu, domain, dev,
- IOMMU_NO_PASID, NULL);
- else if (WARN_ON(true))
- ret = -EINVAL;
+ if (!device_ser) {
+ if (!sm_supported(iommu))
+ ret = domain_context_mapping(domain, dev);
+ else if (intel_domain_is_fs_paging(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
+ else if (intel_domain_is_ss_paging(domain))
+ ret = domain_setup_second_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
+ else if (WARN_ON(true))
+ ret = -EINVAL;
+ }
if (ret)
goto out_block_translation;
@@ -3170,6 +3180,15 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
struct intel_iommu *iommu = info->iommu;
int ret = -EINVAL;
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ /*
+ * Restored IOMMU domains are already attached to the device and can
+ * only be freed. So no need to check the compatibility.
+ */
+ if (iommu_domain_restored_state(domain))
+ return 0;
+#endif
+
if (intel_domain_is_fs_paging(dmar_domain))
ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
else if (intel_domain_is_ss_paging(dmar_domain))
@@ -3647,7 +3666,7 @@ domain_add_dev_pasid(struct iommu_domain *domain,
if (!dev_pasid)
return ERR_PTR(-ENOMEM);
- ret = domain_attach_iommu(dmar_domain, iommu);
+ ret = domain_attach_iommu(dmar_domain, iommu, -1);
if (ret)
goto out_free;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index b0ec0b471a43..8e37acf7de12 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1182,7 +1182,8 @@ void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
-int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu,
+ int restore_did);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev);
int paging_domain_compatible(struct iommu_domain *domain, struct device *dev);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 2b979bec56ce..6e13f697b463 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -40,7 +40,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
return ret;
}
- ret = domain_attach_iommu(dmar_domain, iommu);
+ ret = domain_attach_iommu(dmar_domain, iommu, -1);
if (ret) {
dev_err_ratelimited(dev, "Failed to attach domain to iommu\n");
return ret;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0561990f46e3..e888700da53d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommu-liveupdate.h>
#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
@@ -505,6 +506,10 @@ static int iommu_init_device(struct device *dev)
goto err_free;
}
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ dev->iommu->device_ser = iommu_get_device_preserved_data(dev);
+#endif
+
iommu_dev = ops->probe_device(dev);
if (IS_ERR(iommu_dev)) {
ret = PTR_ERR(iommu_dev);
@@ -2204,6 +2209,13 @@ static int __iommu_attach_device(struct iommu_domain *domain,
ret = domain->ops->attach_dev(domain, dev, old);
if (ret)
return ret;
+
+#ifdef CONFIG_IOMMU_LIVEUPDATE
+ /* The associated state can be unset once restored. */
+ if (dev_iommu_restored_state(dev))
+ WRITE_ONCE(dev->iommu->device_ser, NULL);
+#endif
+
dev->iommu->attach_deferred = 0;
trace_attach_device_to_domain(dev);
return 0;
@@ -3159,6 +3171,47 @@ int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
+static inline void *__iommu_group_restored_state(struct iommu_group *group)
+{
+ struct device *dev;
+
+ dev = iommu_group_first_dev(group);
+ if (!dev_is_pci(dev))
+ return NULL;
+
+ return dev_iommu_restored_state(dev);
+}
+
+static struct iommu_domain *__iommu_group_restore_domain(struct iommu_group *group)
+{
+ struct iommu_device_ser *device_ser;
+ struct iommu_domain *domain;
+ struct device *dev;
+ void *owner;
+
+ lockdep_assert_held(&group->mutex);
+ dev = iommu_group_first_dev(group);
+ if (!dev_is_pci(dev))
+ return NULL;
+
+ device_ser = dev_iommu_restored_state(dev);
+ if (!device_ser)
+ return NULL;
+
+ domain = iommu_restore_domain(dev, device_ser, &owner);
+ if (WARN_ON(IS_ERR(domain)))
+ return NULL;
+
+ /*
+ * Ownership of groups with preserved devices is set during boot. These
+ * will be reclaimed later by the entity (iommufd) that preserved them.
+ */
+ WARN_ON(group->owner);
+ group->owner = owner;
+ group->owner_cnt = 1;
+ return domain;
+}
+
/**
* iommu_setup_default_domain - Set the default_domain for the group
* @group: Group to change
@@ -3173,8 +3226,8 @@ static int iommu_setup_default_domain(struct iommu_group *group,
int target_type)
{
struct iommu_domain *old_dom = group->default_domain;
+ struct iommu_domain *dom, *restored_domain;
struct group_device *gdev;
- struct iommu_domain *dom;
bool direct_failed;
int req_type;
int ret;
@@ -3218,6 +3271,10 @@ static int iommu_setup_default_domain(struct iommu_group *group,
/* We must set default_domain early for __iommu_device_set_domain */
group->default_domain = dom;
if (!group->domain) {
+ if (__iommu_group_restored_state(group))
+ restored_domain = __iommu_group_restore_domain(group);
+ else
+ restored_domain = dom;
/*
* Drivers are not allowed to fail the first domain attach.
* The only way to recover from this is to fail attaching the
@@ -3225,7 +3282,7 @@ static int iommu_setup_default_domain(struct iommu_group *group,
* in group->default_domain so it is freed after.
*/
ret = __iommu_group_set_domain_internal(
- group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+ group, restored_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED);
if (WARN_ON(ret))
goto out_free_old;
} else {
diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
index 60ee29b0c6bd..0888871784ea 100644
--- a/drivers/iommu/liveupdate.c
+++ b/drivers/iommu/liveupdate.c
@@ -234,6 +234,41 @@ int iommu_for_each_preserved_device(iommu_preserved_device_iter_fn fn,
}
EXPORT_SYMBOL(iommu_for_each_preserved_device);
+static inline bool match_device_ser(struct iommu_device_ser *match,
+ struct pci_dev *pdev)
+{
+ return match->devid == pci_dev_id(pdev) && match->pci_domain_nr == pci_domain_nr(pdev->bus);
+}
+
+struct iommu_device_ser *iommu_get_device_preserved_data(struct device *dev)
+{
+ struct iommu_device_ser *device_ser = NULL;
+ struct iommu_device_array_ser *array;
+ struct iommu_flb_obj *flb_obj;
+ int ret, idx;
+
+ if (!dev_is_pci(dev))
+ return NULL;
+
+ ret = liveupdate_flb_get_incoming(&iommu_flb, (void **)&flb_obj);
+ if (ret)
+ return NULL;
+
+ array = phys_to_virt(flb_obj->ser->device_array_phys);
+ iommu_liveupdate_for_each_obj(array, device_ser, idx) {
+ if (match_device_ser(device_ser, to_pci_dev(dev))) {
+ device_ser->hdr.incoming = true;
+ goto out;
+ }
+ }
+
+ device_ser = NULL;
+out:
+ liveupdate_flb_put_incoming(&iommu_flb);
+ return device_ser;
+}
+EXPORT_SYMBOL(iommu_get_device_preserved_data);
+
struct iommu_hw_ser *iommu_get_preserved_data(u64 token, enum iommu_type_ser type)
{
struct iommu_hw_ser *iommu_ser = NULL;
@@ -512,3 +547,46 @@ void iommu_unpreserve_device(struct iommu_domain *domain, struct device *dev)
iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
}
+
+struct iommu_domain *iommu_restore_domain(struct device *dev,
+ struct iommu_device_ser *ser,
+ void **owner)
+{
+ struct iommu_domain_ser *domain_ser;
+ struct iommu_flb_obj *flb_obj;
+ struct iommu_domain *domain;
+ int ret;
+
+ domain_ser = phys_to_virt(ser->domain_iommu_ser.domain_phys);
+
+ ret = liveupdate_flb_get_incoming(&iommu_flb, (void **)&flb_obj);
+ if (ret)
+ return ERR_PTR(ret);
+
+ guard(mutex)(&flb_obj->lock);
+ if (domain_ser->restored_domain) {
+ domain = domain_ser->restored_domain;
+ goto out;
+ }
+
+ domain_ser->hdr.incoming = true;
+ domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(domain))
+ goto out;
+
+ ret = domain->ops->restore(domain, domain_ser);
+ if (ret) {
+ iommu_domain_free(domain);
+ domain = ERR_PTR(ret);
+ goto out;
+ }
+
+ /* The device is owned by the preserved state. */
+ *owner = ser;
+ domain->preserved_state = domain_ser;
+ domain_ser->restored_domain = domain;
+
+out:
+ liveupdate_flb_put_incoming(&iommu_flb);
+ return domain;
+}
diff --git a/include/linux/iommu-liveupdate.h b/include/linux/iommu-liveupdate.h
index 0baf6bc2d93f..75d27256c883 100644
--- a/include/linux/iommu-liveupdate.h
+++ b/include/linux/iommu-liveupdate.h
@@ -30,6 +30,20 @@ static inline void *dev_iommu_preserved_state(struct device *dev)
return NULL;
}
+static inline void *dev_iommu_restored_state(struct device *dev)
+{
+ struct iommu_device_ser *ser;
+
+ if (!dev->iommu)
+ return NULL;
+
+ ser = dev->iommu->device_ser;
+ if (ser && ser->hdr.incoming)
+ return ser;
+
+ return NULL;
+}
+
static inline void *iommu_domain_restored_state(struct iommu_domain *domain)
{
struct iommu_domain_ser *ser;
@@ -41,8 +55,22 @@ static inline void *iommu_domain_restored_state(struct iommu_domain *domain)
return NULL;
}
+static inline int dev_iommu_restore_did(struct device *dev, struct iommu_domain *domain)
+{
+ struct iommu_device_ser *ser = dev_iommu_restored_state(dev);
+
+ if (ser && iommu_domain_restored_state(domain))
+ return ser->domain_iommu_ser.attachment_id;
+
+ return -1;
+}
+
+struct iommu_domain *iommu_restore_domain(struct device *dev,
+ struct iommu_device_ser *ser,
+ void **owner);
int iommu_for_each_preserved_device(iommu_preserved_device_iter_fn fn,
void *arg);
+struct iommu_device_ser *iommu_get_device_preserved_data(struct device *dev);
struct iommu_hw_ser *iommu_get_preserved_data(u64 token, enum iommu_type_ser type);
int iommu_domain_preserve(struct iommu_domain *domain, struct iommu_domain_ser **ser);
void iommu_domain_unpreserve(struct iommu_domain *domain);
@@ -60,16 +88,38 @@ static inline void *dev_iommu_preserved_state(struct device *dev)
return NULL;
}
+static inline void *dev_iommu_restored_state(struct device *dev)
+{
+ return NULL;
+}
+
+static inline int dev_iommu_restore_did(struct device *dev, struct iommu_domain *domain)
+{
+ return -1;
+}
+
static inline void *iommu_domain_restored_state(struct iommu_domain *domain)
{
return NULL;
}
+static inline struct iommu_domain *iommu_restore_domain(struct device *dev,
+ struct iommu_device_ser *ser,
+ void **owner)
+{
+ return NULL;
+}
+
static inline int iommu_for_each_preserved_device(iommu_preserved_device_iter_fn fn, void *arg)
{
return -EOPNOTSUPP;
}
+static inline struct iommu_device_ser *iommu_get_device_preserved_data(struct device *dev)
+{
+ return NULL;
+}
+
static inline struct iommu_hw_ser *iommu_get_preserved_data(u64 token, enum iommu_type_ser type)
{
return NULL;
--
2.54.0.545.g6539524ca2-goog
next prev parent reply other threads:[~2026-04-27 17:56 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-27 17:56 [PATCH v2 00/16] iommu: Add live update state preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 01/16] liveupdate: luo_file: Add internal APIs for file preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 02/16] iommu: Implement IOMMU Live update FLB callbacks Samiullah Khawaja
2026-05-01 21:45 ` David Matlack
2026-04-27 17:56 ` [PATCH v2 03/16] iommu: Implement IOMMU domain preservation Samiullah Khawaja
2026-05-01 22:08 ` David Matlack
2026-05-04 18:33 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation Samiullah Khawaja
2026-05-01 22:42 ` David Matlack
2026-05-04 19:06 ` Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 05/16] iommu/pages: Add APIs to preserve/unpreserve/restore iommu pages Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 06/16] iommupt: Implement preserve/unpreserve/restore callbacks Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 07/16] iommu/vt-d: Implement device and iommu preserve/unpreserve ops Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 08/16] iommu: Add APIs to get iommu and device preserved state Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 09/16] iommu/vt-d: Restore IOMMU state and reclaimed domain ids Samiullah Khawaja
2026-04-27 17:56 ` Samiullah Khawaja [this message]
2026-04-27 17:56 ` [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 12/16] iommufd: Implement ioctl to mark HWPT for preservation Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 13/16] iommufd: Persist iommu hardware pagetables for live update Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 14/16] iommufd: Add APIs to preserve/unpreserve a vfio cdev Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 15/16] vfio/pci: Preserve the iommufd state of the " Samiullah Khawaja
2026-04-27 17:56 ` [PATCH v2 16/16] iommufd/selftest: Add test to verify iommufd preservation Samiullah Khawaja
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260427175633.1978233-11-skhawaja@google.com \
--to=skhawaja@google.com \
--cc=ajayachandra@nvidia.com \
--cc=akpm@linux-foundation.org \
--cc=alex@shazbot.org \
--cc=baolu.lu@linux.intel.com \
--cc=chrisl@kernel.org \
--cc=dmatlack@google.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=leonro@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=parav@nvidia.com \
--cc=pasha.tatashin@soleen.com \
--cc=praan@google.com \
--cc=pratyush@kernel.org \
--cc=robin.murphy@arm.com \
--cc=saeedm@nvidia.com \
--cc=shuah@kernel.org \
--cc=vipinsh@google.com \
--cc=will@kernel.org \
--cc=witu@nvidia.com \
--cc=zhuyifei@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox