From: James Gowans <jgowans@amazon.com>
To: <linux-kernel@vger.kernel.org>
Cc: "Jason Gunthorpe" <jgg@ziepe.ca>,
"Kevin Tian" <kevin.tian@intel.com>,
"Joerg Roedel" <joro@8bytes.org>,
"Krzysztof Wilczyński" <kw@linux.com>,
"Will Deacon" <will@kernel.org>,
"Robin Murphy" <robin.murphy@arm.com>,
"Mike Rapoport" <rppt@kernel.org>,
"Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>,
iommu@lists.linux.dev, "Sean Christopherson" <seanjc@google.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
kvm@vger.kernel.org, "David Woodhouse" <dwmw2@infradead.org>,
"Lu Baolu" <baolu.lu@linux.intel.com>,
"Alexander Graf" <graf@amazon.de>,
anthony.yznaga@oracle.com, steven.sistare@oracle.com,
nh-open-source@amazon.com, "Saenz Julienne,
Nicolas" <nsaenz@amazon.es>
Subject: [RFC PATCH 09/13] intel-iommu: Serialise dmar_domain on KHO activaet
Date: Mon, 16 Sep 2024 13:30:58 +0200 [thread overview]
Message-ID: <20240916113102.710522-10-jgowans@amazon.com> (raw)
In-Reply-To: <20240916113102.710522-1-jgowans@amazon.com>
Add logic to iterate through persistent domains, add the page table
pages to KHO persistent memory pages. Also serialise some metadata about
the domains and attached PCI devices.
By adding the page table pages to the `mem` attribute on the KHO object
these pages will be carved out of system memory early in boot by KHO,
guaranteeing that they will not be used for any other purpose by the new
kernel. This persists the page tables across kexec.
---
drivers/iommu/intel/iommu.c | 9 ----
drivers/iommu/intel/iommu.h | 10 ++++
drivers/iommu/intel/serialise.c | 92 ++++++++++++++++++++++++++++++++-
3 files changed, 101 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7e77b787148a..0a2118a3b7c4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -46,15 +46,6 @@
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
-#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
-
-/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
- to match. That way, we can use 'unsigned long' for PFNs with impunity. */
-#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
- __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
-#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 7866342f0909..cd932a97a9bc 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -38,6 +38,16 @@
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
+#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
+
+/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
+ to match. That way, we can use 'unsigned long' for PFNs with impunity. */
+#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
+ __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
+#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
+
+
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
diff --git a/drivers/iommu/intel/serialise.c b/drivers/iommu/intel/serialise.c
index 08a548b33703..bc755e51732b 100644
--- a/drivers/iommu/intel/serialise.c
+++ b/drivers/iommu/intel/serialise.c
@@ -2,9 +2,99 @@
#include "iommu.h"
+/*
+ * Serialised format:
+ * /intel-iommu
+ * compatible = str
+ * domains = {
+ * persistent-id = {
+ * mem = [ ... ] // page table pages
+ * agaw = i32
+ * pgd = u64
+ * devices = {
+ * id = {
+ * u8 bus;
+ * u8 devfn
+ * },
+ * ...
+ * }
+ * }
+ * }
+ */
+
+/*
+ * Adds all present PFNs on the PTE page to the kho_mem pointer and advances
+ * the pointer.
+ * Stolen from dma_pte_list_pagetables() */
+static void save_pte_pages(struct dmar_domain *domain, int level,
+ struct dma_pte *pte, struct kho_mem **kho_mem)
+{
+ struct page *pg;
+
+ pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
+
+ if (level == 1)
+ return;
+
+ pte = page_address(pg);
+ do {
+ if (dma_pte_present(pte)) {
+ (*kho_mem)->addr = dma_pte_addr(pte);
+ (*kho_mem)->len = PAGE_SIZE;
+ (*kho_mem)++;
+ if (!dma_pte_superpage(pte))
+ save_pte_pages(domain, level - 1, pte, kho_mem);
+ }
+ pte++;
+ } while (!first_pte_in_page(pte));
+}
+
static int serialise_domain(void *fdt, struct iommu_domain *domain)
{
- return 0;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ /*
+ * kho_mems_start points to the original allocated array; kho_mems
+ * is incremented by the callee. Keep both to know how many were added.
+ */
+ struct kho_mem *kho_mems, *kho_mems_start;
+ struct device_domain_info *info;
+ int err = 0;
+ char name[24];
+ int device_idx = 0;
+ phys_addr_t pgd;
+
+ /*
+ * Assume just one page worth of kho_mem objects is enough.
+ * Better would be to keep track of number of allocated pages in the domain.
+ * */
+ kho_mems_start = kho_mems = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ save_pte_pages(dmar_domain, agaw_to_level(dmar_domain->agaw),
+ dmar_domain->pgd, &kho_mems);
+
+ snprintf(name, sizeof(name), "%lu", domain->persistent_id);
+ err |= fdt_begin_node(fdt, name);
+ err |= fdt_property(fdt, "mem", kho_mems_start,
+ sizeof(struct kho_mem) * (kho_mems - kho_mems_start));
+ err |= fdt_property(fdt, "persistent_id", &domain->persistent_id,
+ sizeof(domain->persistent_id));
+ pgd = virt_to_phys(dmar_domain->pgd);
+ err |= fdt_property(fdt, "pgd", &pgd, sizeof(pgd));
+ err |= fdt_property(fdt, "agaw", &dmar_domain->agaw,
+ sizeof(dmar_domain->agaw));
+
+ err |= fdt_begin_node(fdt, "devices");
+ list_for_each_entry(info, &dmar_domain->devices, link) {
+ snprintf(name, sizeof(name), "%i", device_idx++);
+ err |= fdt_begin_node(fdt, name);
+ err |= fdt_property(fdt, "bus", &info->bus, sizeof(info->bus));
+ err |= fdt_property(fdt, "devfn", &info->devfn, sizeof(info->devfn));
+ err |= fdt_end_node(fdt); /* device_idx */
+ }
+ err |= fdt_end_node(fdt); /* devices */
+ err |= fdt_end_node(fdt); /* domain->persistent_id */
+
+ return err;
}
int intel_iommu_serialise_kho(struct notifier_block *self, unsigned long cmd,
--
2.34.1
next prev parent reply other threads:[~2024-09-16 11:34 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-16 11:30 [RFC PATCH 00/13] Support iommu(fd) persistence for live update James Gowans
2024-09-16 11:30 ` [RFC PATCH 01/13] iommufd: Support marking and tracking persistent iommufds James Gowans
2024-09-16 11:30 ` [RFC PATCH 02/13] iommufd: Add plumbing for KHO (de)serialise James Gowans
2024-09-16 11:30 ` [RFC PATCH 03/13] iommu/intel: zap context table entries on kexec James Gowans
2024-10-03 13:27 ` Jason Gunthorpe
2024-09-16 11:30 ` [RFC PATCH 04/13] iommu: Support marking domains as persistent on alloc James Gowans
2024-09-16 11:30 ` [RFC PATCH 05/13] iommufd: Serialise persisted iommufds and ioas James Gowans
2024-10-02 18:55 ` Jason Gunthorpe
2024-10-07 8:39 ` Gowans, James
2024-10-07 8:47 ` David Woodhouse
2024-10-07 8:57 ` Gowans, James
2024-10-07 15:01 ` Jason Gunthorpe
2024-10-09 11:44 ` Gowans, James
2024-10-09 12:28 ` Jason Gunthorpe
2024-10-10 15:12 ` Gowans, James
2024-10-10 15:32 ` Jason Gunthorpe
2024-10-07 15:11 ` Jason Gunthorpe
2024-10-07 15:16 ` Jason Gunthorpe
2024-10-16 22:20 ` Jacob Pan
2024-10-28 16:03 ` Jacob Pan
2024-11-02 10:22 ` Gowans, James
2024-11-04 13:00 ` Jason Gunthorpe
2024-11-06 19:18 ` Jacob Pan
2024-09-16 11:30 ` [RFC PATCH 06/13] iommufd: Expose persistent iommufd IDs in sysfs James Gowans
2024-09-16 11:30 ` [RFC PATCH 07/13] iommufd: Re-hydrate a usable iommufd ctx from sysfs James Gowans
2024-09-16 11:30 ` [RFC PATCH 08/13] intel-iommu: Add serialise and deserialise boilerplate James Gowans
2024-09-16 11:30 ` James Gowans [this message]
2024-09-16 11:30 ` [RFC PATCH 10/13] intel-iommu: Re-hydrate persistent domains after kexec James Gowans
2024-09-16 11:31 ` [RFC PATCH 11/13] iommu: Add callback to restore persisted iommu_domain James Gowans
2024-10-03 13:33 ` Jason Gunthorpe
2024-09-16 11:31 ` [RFC PATCH 12/13] iommufd, guestmemfs: Ensure persistent file used for persistent DMA James Gowans
2024-10-03 13:36 ` Jason Gunthorpe
2024-09-16 11:31 ` [RFC PATCH 13/13] iommufd, guestmemfs: Pin files when mapped " James Gowans
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240916113102.710522-10-jgowans@amazon.com \
--to=jgowans@amazon.com \
--cc=anthony.yznaga@oracle.com \
--cc=baolu.lu@linux.intel.com \
--cc=dwmw2@infradead.org \
--cc=graf@amazon.de \
--cc=iommu@lists.linux.dev \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=kw@linux.com \
--cc=linux-kernel@vger.kernel.org \
--cc=madvenka@linux.microsoft.com \
--cc=nh-open-source@amazon.com \
--cc=nsaenz@amazon.es \
--cc=pbonzini@redhat.com \
--cc=robin.murphy@arm.com \
--cc=rppt@kernel.org \
--cc=seanjc@google.com \
--cc=steven.sistare@oracle.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox