From: Baolu Lu <baolu.lu@linux.intel.com>
To: Samiullah Khawaja <skhawaja@google.com>,
David Woodhouse <dwmw2@infradead.org>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Jason Gunthorpe <jgg@ziepe.ca>
Cc: Robin Murphy <robin.murphy@arm.com>,
Kevin Tian <kevin.tian@intel.com>,
Alex Williamson <alex@shazbot.org>, Shuah Khan <shuah@kernel.org>,
iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, Pratyush Yadav <pratyush@kernel.org>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
David Matlack <dmatlack@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Pranjal Shrivastava <praan@google.com>,
Vipin Sharma <vipinsh@google.com>
Subject: Re: [PATCH v3 07/18] iommu/vt-d: Implement device and iommu preserve/unpreserve ops
Date: Mon, 22 Jun 2026 09:50:36 +0800 [thread overview]
Message-ID: <f3e2fef9-4f8c-4032-8d7b-007f47b3fb61@linux.intel.com> (raw)
In-Reply-To: <20260614233728.2212104-8-skhawaja@google.com>
On 6/15/26 07:37, Samiullah Khawaja wrote:
> Add implementation of the device and iommu presevation in a separate
> file. Also set the device and iommu preserve/unpreserve ops in the
> struct iommu_ops.
>
> Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
> ---
> MAINTAINERS | 8 ++
> drivers/iommu/intel/Makefile | 1 +
> drivers/iommu/intel/iommu.c | 8 +-
> drivers/iommu/intel/iommu.h | 28 +++++
> drivers/iommu/intel/liveupdate.c | 175 +++++++++++++++++++++++++++++++
> include/linux/kho/abi/iommu.h | 21 ++++
> 6 files changed, 239 insertions(+), 2 deletions(-)
> create mode 100644 drivers/iommu/intel/liveupdate.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cef90bb50781..4dfb56962426 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13002,6 +13002,14 @@ S: Supported
> T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
> F: drivers/iommu/intel/
>
> +INTEL IOMMU LIVEUPDATE (VT-d)
> +M: Samiullah Khawaja <skhawaja@google.com>
> +M: Lu Baolu <baolu.lu@linux.intel.com>
> +L: iommu@lists.linux.dev
> +S: Maintained
> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
> +F: drivers/iommu/intel/liveupdate.c
> +
> INTEL IPU3 CSI-2 CIO2 DRIVER
> M: Yong Zhi <yong.zhi@intel.com>
> M: Sakari Ailus <sakari.ailus@linux.intel.com>
> diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
> index ada651c4a01b..d38fc101bc35 100644
> --- a/drivers/iommu/intel/Makefile
> +++ b/drivers/iommu/intel/Makefile
> @@ -6,3 +6,4 @@ obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
> obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
> obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
> obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
> +obj-$(CONFIG_IOMMU_LIVEUPDATE) += liveupdate.o
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index c3d18cd77d2f..715b538e7efe 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -16,6 +16,7 @@
> #include <linux/crash_dump.h>
> #include <linux/dma-direct.h>
> #include <linux/dmi.h>
> +#include <linux/iommu-liveupdate.h>
> #include <linux/memory.h>
> #include <linux/pci.h>
> #include <linux/pci-ats.h>
> @@ -60,8 +61,6 @@ static int force_on = 0;
> static int intel_iommu_tboot_noforce;
> static int no_platform_optin;
>
> -#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
> -
> /*
> * Take a root_entry and return the Lower Context Table Pointer (LCTP)
> * if marked present.
> @@ -3926,6 +3925,11 @@ const struct iommu_ops intel_iommu_ops = {
> .is_attach_deferred = intel_iommu_is_attach_deferred,
> .def_domain_type = device_def_domain_type,
> .page_response = intel_iommu_page_response,
> +#ifdef CONFIG_IOMMU_LIVEUPDATE
> + .preserve_device = intel_iommu_preserve_device,
> + .preserve = intel_iommu_preserve,
> + .unpreserve = intel_iommu_unpreserve,
> +#endif
Any reason why an unpreserve_device callback is missing here?
> };
>
> static void quirk_iommu_igfx(struct pci_dev *dev)
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index ef145560aa98..5e0bc17e76bf 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -552,6 +552,8 @@ struct root_entry {
> u64 hi;
> };
>
> +#define ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(struct root_entry))
> +
> /*
> * low 64 bits:
> * 0: present
> @@ -1284,6 +1286,32 @@ static inline int iopf_for_domain_replace(struct iommu_domain *new,
> return 0;
> }
>
> +#ifdef CONFIG_IOMMU_LIVEUPDATE
> +int intel_iommu_preserve_device(struct device *dev,
> + struct iommu_device_ser *device_ser);
> +int intel_iommu_preserve(struct iommu_device *iommu,
> + struct iommu_hw_ser *iommu_ser);
> +void intel_iommu_unpreserve(struct iommu_device *iommu,
> + struct iommu_hw_ser *iommu_ser);
> +#else
> +static inline int intel_iommu_preserve_device(struct device *dev,
> + struct iommu_device_ser *device_ser)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static inline int intel_iommu_preserve(struct iommu_device *iommu,
> + struct iommu_hw_ser *iommu_ser)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static inline void intel_iommu_unpreserve(struct iommu_device *iommu,
> + struct iommu_hw_ser *iommu_ser)
> +{
> +}
> +#endif
> +
> #ifdef CONFIG_INTEL_IOMMU_SVM
> void intel_svm_check(struct intel_iommu *iommu);
> struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
> diff --git a/drivers/iommu/intel/liveupdate.c b/drivers/iommu/intel/liveupdate.c
> new file mode 100644
> index 000000000000..b5613e4ad43a
> --- /dev/null
> +++ b/drivers/iommu/intel/liveupdate.c
> @@ -0,0 +1,175 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2026, Google LLC
> + * Author: Samiullah Khawaja <skhawaja@google.com>
> + */
> +
> +#define pr_fmt(fmt) "DMAR: liveupdate: " fmt
> +
> +#include <linux/kexec_handover.h>
> +#include <linux/liveupdate.h>
> +#include <linux/iommu-liveupdate.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +
> +#include "iommu.h"
> +#include "../iommu-pages.h"
> +
> +/* 2 tables per bus in scalable mode with upper table at odd bit */
> +#define CONTEXT_TABLE_PRESERVED_BIT(bus, devfn) ((bus << 1) + (devfn >> 7))
> +static bool is_context_table_preserved(struct intel_iommu *iommu,
> + struct iommu_hw_ser *ser,
> + u8 bus, u8 devfn)
> +{
> + return test_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
> + (unsigned long *)&ser->intel.context_tables_bitmap[0]);
> +}
> +
> +static void unpreserve_context_table(struct intel_iommu *iommu,
> + struct iommu_hw_ser *ser,
> + u8 bus, u8 devfn)
> +{
> + struct context_entry *context;
> +
> + spin_lock(&iommu->lock);
> + context = iommu_context_addr(iommu, bus, devfn, 0);
> + spin_unlock(&iommu->lock);
The spinlock is dropped immediately after reading the address pointer.
If this is guaranteed to be safe, please add a comment to explain why a
UAF or race is avoided here. Otherwise, the locking scope needs to be
extened to protect both the pointer lookup and use.
> + if (context && is_context_table_preserved(iommu, ser, bus, devfn)) {
> + iommu_unpreserve_pages(context);
> + clear_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
> + (unsigned long *)&ser->intel.context_tables_bitmap[0]);
> + }
> +}
> +
> +static int preserve_context_table(struct intel_iommu *iommu,
> + struct iommu_hw_ser *ser,
> + u8 bus, u8 devfn)
> +{
> + struct context_entry *context;
> + int ret;
> +
> + spin_lock(&iommu->lock);
> + context = iommu_context_addr(iommu, bus, devfn, 0);
> + spin_unlock(&iommu->lock);
Ditto.
> + if (context && !is_context_table_preserved(iommu, ser, bus, devfn)) {
> + ret = iommu_preserve_pages(context);
> + if (ret)
> + return ret;
> +
> + set_bit(CONTEXT_TABLE_PRESERVED_BIT(bus, devfn),
> + (unsigned long *)&ser->intel.context_tables_bitmap[0]);
> + }
> +
> + return 0;
> +}
> +
> +static void unpreserve_iommu_context_tables(struct intel_iommu *iommu,
> + struct iommu_hw_ser *ser)
> +{
> + int i;
> +
> + for (i = 0; i < ROOT_ENTRY_NR; i++) {
> + unpreserve_context_table(iommu, ser, i, 0);
> +
> + if (!sm_supported(iommu))
> + continue;
> +
> + unpreserve_context_table(iommu, ser, i, 0x80);
> + }
> +}
> +
> +static int preserve_iommu_context_tables(struct device_domain_info *info)
> +{
> + struct iommu_hw_ser *iommu_ser;
> + struct intel_iommu *iommu;
> + int ret;
> + int i;
> +
> + /* IOMMU for this device should already preserved.*/
> + iommu = info->iommu;
> + iommu_ser = iommu_preserved_state(&iommu->iommu);
> + if (!iommu_ser)
> + return -EINVAL;
> +
> + /*
> + * We could do preservation of context tables only for the bus of this
> + * device, but these devices can have PCI aliases, so context tables for
> + * those will also require preservation. Also unpreserve would require
> + * some kind of refcounting where the context table will only be
> + * unpreserved when the last device associated with it is unpreserved.
> + *
> + * This introduces unnecessary complication with minimum benefits as the
> + * unpreserved context tables will probably be recreated by the next
> + * kernel as these are all active devices. We follow simpler approach by
> + * just preserving the currently active context tables.
> + */
> + for (i = 0; i < ROOT_ENTRY_NR; i++) {
> + ret = preserve_context_table(iommu, iommu_ser, i, 0);
> + if (ret)
> + return ret;
> +
> + if (!sm_supported(iommu))
> + continue;
> +
> + ret = preserve_context_table(iommu, iommu_ser, i, 0x80);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +int intel_iommu_preserve_device(struct device *dev,
> + struct iommu_device_ser *device_ser)
> +{
> + struct device_domain_info *info = dev_iommu_priv_get(dev);
> + int ret;
> +
> + if (!dev_is_pci(dev)) {
> + dev_err(dev, "Cannot preserve non-PCI device\n");
> + return -EOPNOTSUPP;
> + }
> +
> + if (!info || !info->domain)
> + return -EINVAL;
> +
> + ret = preserve_iommu_context_tables(info);
> + if (ret)
> + return ret;
> +
> + device_ser->domain_iommu_ser.attachment_id = domain_id_iommu(info->domain,
> + info->iommu);
> + return 0;
> +}
> +
> +int intel_iommu_preserve(struct iommu_device *iommu_dev,
> + struct iommu_hw_ser *ser)
> +{
> + struct intel_iommu *iommu;
> + int ret;
> +
> + iommu = container_of(iommu_dev, struct intel_iommu, iommu);
> +
> + ret = iommu_preserve_pages(iommu->root_entry);
> + if (ret)
> + return ret;
> +
> + ser->intel.phys_addr = iommu->reg_phys;
> + ser->intel.root_table = __pa(iommu->root_entry);
> + ser->type = IOMMU_INTEL;
> + ser->token = ser->intel.phys_addr;
> +
> + return 0;
> +}
> +
> +void intel_iommu_unpreserve(struct iommu_device *iommu_dev,
> + struct iommu_hw_ser *ser)
> +{
> + struct intel_iommu *iommu;
> +
> + iommu = container_of(iommu_dev, struct intel_iommu, iommu);
> +
> + unpreserve_iommu_context_tables(iommu, ser);
> + iommu_unpreserve_pages(iommu->root_entry);
> +}
> diff --git a/include/linux/kho/abi/iommu.h b/include/linux/kho/abi/iommu.h
> index d06f251a2df4..ad760c497e13 100644
> --- a/include/linux/kho/abi/iommu.h
> +++ b/include/linux/kho/abi/iommu.h
> @@ -80,6 +80,7 @@
> */
> enum iommu_type_ser {
> IOMMU_INVALID,
> + IOMMU_INTEL,
> };
>
> #define IOMMU_SER_FLAG_DELETED (1 << 0)
> @@ -140,16 +141,36 @@ struct iommu_device_ser {
> struct iommu_dev_map_ser domain_iommu_ser;
> } __packed;
>
> +/**
> + * struct iommu_intel_ser - Serialized state of an Intel IOMMU instance
> + * @restored: Whether IOMMU state is restored
> + * @phys_addr: Physical address of the IOMMU register base
> + * @root_table: Physical address of the root entry table
> + * @context_tables_bitmap: Bitmap representing the context tables that are
> + * preserved.
> + */
> +struct iommu_intel_ser {
> + u8 restored;
> + u8 padding[7];
> + u64 phys_addr;
> + u64 root_table;
> + u64 context_tables_bitmap[8]; /* Tracks upto 512 context tables */
To avoid open-coded magic numbers, how about something like,
#define VTD_PRESERVED_BITMAP_LONGS DIV_ROUND_UP(512, BITS_PER_LONG_LONG)
u64 context_tables_bitmap[VTD_PRESERVED_BITMAP_LONGS];
?
> +};
> +
> /**
> * struct iommu_hw_ser - Serialized state of an IOMMU instance
> * @hdr: Common object header
> * @token: Unique token for the IOMMU
> * @type: IOMMU type serialized state belongs to
> + * @intel: Intel specific serialization data
> */
> struct iommu_hw_ser {
> struct iommu_hdr_ser hdr;
> u64 token;
> u64 type;
> + union {
> + struct iommu_intel_ser intel;
> + };
> } __packed;
>
> /**
Thanks,
baolu
next prev parent reply other threads:[~2026-06-22 1:51 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-14 23:37 [PATCH v3 00/18] iommu: Add live update state preservation Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 01/18] memfd: export memfd_get_seals() Samiullah Khawaja
2026-06-15 5:14 ` Ankit Soni
2026-06-15 11:45 ` Pratyush Yadav
2026-06-14 23:37 ` [PATCH v3 02/18] iommu: Implement IOMMU Live update FLB callbacks Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 03/18] iommu/pages: Add APIs to preserve/unpreserve/restore iommu pages Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 04/18] iommupt: Implement preserve/unpreserve/restore callbacks Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 05/18] iommu: Implement IOMMU domain preservation Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 06/18] iommu: Implement device and IOMMU HW preservation Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 07/18] iommu/vt-d: Implement device and iommu preserve/unpreserve ops Samiullah Khawaja
2026-06-22 1:50 ` Baolu Lu [this message]
2026-06-22 19:19 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 08/18] iommu/vt-d: clear unpreserved context entries during shutdown Samiullah Khawaja
2026-06-22 2:47 ` Baolu Lu
2026-06-22 22:56 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 09/18] iommu: Add APIs to get iommu and device preserved state Samiullah Khawaja
2026-06-22 3:10 ` Baolu Lu
2026-06-22 23:27 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 10/18] iommu/vt-d: Restore IOMMU state and reclaimed domain ids Samiullah Khawaja
2026-06-22 5:14 ` Baolu Lu
2026-06-22 23:30 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 11/18] iommu: Restore and reattach preserved domains to devices Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 12/18] iommu/vt-d: Handle reattach of the restored domain Samiullah Khawaja
2026-06-22 5:44 ` Baolu Lu
2026-06-23 0:26 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 13/18] iommu/vt-d: preserve PASID table of preserved device Samiullah Khawaja
2026-06-22 6:01 ` Baolu Lu
2026-06-23 0:36 ` Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 14/18] iommufd: Implement ioctl to mark HWPT for preservation Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 15/18] iommufd: Persist iommu hardware pagetables for live update Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 16/18] iommufd: Add APIs to preserve/unpreserve a vfio cdev Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 17/18] vfio/pci: Preserve the iommufd state of the " Samiullah Khawaja
2026-06-14 23:37 ` [PATCH v3 18/18] iommufd/selftest: Add test to verify iommufd preservation Samiullah Khawaja
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f3e2fef9-4f8c-4032-8d7b-007f47b3fb61@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=alex@shazbot.org \
--cc=dmatlack@google.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=praan@google.com \
--cc=pratyush@kernel.org \
--cc=robin.murphy@arm.com \
--cc=shuah@kernel.org \
--cc=skhawaja@google.com \
--cc=vipinsh@google.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox