All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mostafa Saleh <smostafa@google.com>
To: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org,  kvmarm@lists.linux.dev,
	iommu@lists.linux.dev
Cc: catalin.marinas@arm.com, will@kernel.org, maz@kernel.org,
	 oliver.upton@linux.dev, joey.gouly@arm.com,
	suzuki.poulose@arm.com,  yuzenghui@huawei.com, joro@8bytes.org,
	jean-philippe@linaro.org, jgg@ziepe.ca,  mark.rutland@arm.com,
	qperret@google.com, tabba@google.com,  vdonnefort@google.com,
	sebastianene@google.com, keirf@google.com,
	 Mostafa Saleh <smostafa@google.com>
Subject: [PATCH v6 08/25] KVM: arm64: iommu: Shadow host stage-2 page table
Date: Fri,  1 May 2026 11:19:10 +0000	[thread overview]
Message-ID: <20260501111928.259252-9-smostafa@google.com> (raw)
In-Reply-To: <20260501111928.259252-1-smostafa@google.com>

Create a page-table for the IOMMU that shadows the host CPU stage-2
to establish DMA isolation.

An initial snapshot is created after the driver init, then
on every permission change a callback would be called for
the IOMMU driver to update the page table.

There are 3 different ways to add the callback:
1) In the high level memory transitions: (__pkvm_host_donate_hyp(),
  __pkvm_host_donate_guest()...

2) In Lower level functions covering all transitions
  - host_stage2_set_owner_metadata_locked() which covers:
   - __pkvm_host_donate_hyp()
   - __pkvm_host_donate_guest()
   - __pkvm_host_donate_hyp()
   - __pkvm_guest_unshare_host()
  - host_stage2_set_owner_locked() only for ID_HOST which covers:
   - __pkvm_hyp_donate_host()
   - __pkvm_host_force_reclaim_page_guest()
   - __pkvm_host_reclaim_page_guest()
   - __pkvm_guest_share_host()

3) In the lowest level function __host_update_page_state(), which
   requires only one callback. However, in that case the page state
   is not enough as we might need to know the old state also.

Option #3 was implemented here.

For some cases, an SMMUv3 may be able to share the same page-table
used with the host CPU stage-2 directly.

However, this is too strict and requires changes to the core hypervisor
page-table code, plus it would require the hypervisor to handle IOMMU
page-faults. This can be added later as an optimization for SMMUV3.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 arch/arm64/kvm/hyp/include/nvhe/iommu.h       |   4 +
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   2 +
 arch/arm64/kvm/hyp/nvhe/iommu/iommu.c         | 108 +++++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  35 ++++++
 4 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
index 1ac70cc28a9e..6277d845cdcf 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/iommu.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h
@@ -3,11 +3,15 @@
 #define __ARM64_KVM_NVHE_IOMMU_H__
 
 #include <asm/kvm_host.h>
+#include <asm/kvm_pgtable.h>
 
 struct kvm_iommu_ops {
 	int (*init)(void);
+	int (*host_stage2_idmap)(phys_addr_t start, phys_addr_t end, int prot);
 };
 
 int kvm_iommu_init(void);
 
+int kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
+				enum kvm_pgtable_prot prot);
 #endif /* __ARM64_KVM_NVHE_IOMMU_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index ff440204d2c7..f7faecc3b70a 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -54,6 +54,8 @@ int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct
 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);
 
 bool addr_is_memory(phys_addr_t phys);
+u64 find_mem_range_from(u64 start, bool *is_memory);
+
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
index 406c8fb9b3b9..1db52bd87c38 100644
--- a/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
+++ b/arch/arm64/kvm/hyp/nvhe/iommu/iommu.c
@@ -4,17 +4,119 @@
  *
  * Copyright (C) 2022 Linaro Ltd.
  */
+#include <linux/iommu.h>
+
 #include <nvhe/iommu.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/spinlock.h>
 
 /* Only one set of ops supported */
 struct kvm_iommu_ops *kvm_iommu_ops;
 
+/* Protected by host_mmu.lock */
+static bool kvm_idmap_initialized;
+
+static inline int pkvm_to_iommu_prot(enum kvm_pgtable_prot prot)
+{
+	int iommu_prot = 0;
+
+	if (prot & KVM_PGTABLE_PROT_R)
+		iommu_prot |= IOMMU_READ;
+	if (prot & KVM_PGTABLE_PROT_W)
+		iommu_prot |= IOMMU_WRITE;
+
+	/* We don't understand that, might be dangerous. */
+	WARN_ON(prot & ~PKVM_HOST_MEM_PROT);
+	return iommu_prot;
+}
+
+static int __snapshot_host_stage2(const struct kvm_pgtable_visit_ctx *ctx,
+				  enum kvm_pgtable_walk_flags visit)
+{
+	u64 start = ctx->addr;
+	u64 end = start + kvm_granule_size(ctx->level);
+	kvm_pte_t pte = *ctx->ptep;
+	bool is_memory;
+	u64 region_end;
+	int prot;
+	int ret;
+
+	/*
+	 * Keep annotated PTEs unmapped, and map everything else even lazily
+	 * mapped MMIO with pte == 0, as the IOMMU can't handle page faults.
+	 * That maps the whole address space which can be large, but that doesn't
+	 * use a lot of memory as it will be mostly large block (1 GB with 4kb pages)
+	 */
+	if (pte && !kvm_pte_valid(pte))
+		return 0;
+
+	if (kvm_pte_valid(pte)) {
+		prot = pkvm_to_iommu_prot(kvm_pgtable_stage2_pte_prot(pte));
+		/* If the range is mapped in a single PTE, it must be the same type.*/
+		if (!addr_is_memory(start))
+			prot |= IOMMU_MMIO;
+
+		return kvm_iommu_ops->host_stage2_idmap(start, end, prot);
+	}
+
+	/* In case of invalid PTE, we need to figure out which part of it is MMIO */
+	do {
+		prot = IOMMU_READ | IOMMU_WRITE;
+		region_end = find_mem_range_from(start, &is_memory);
+		region_end = min(end, region_end);
+		if (!is_memory)
+			prot |= IOMMU_MMIO;
+
+		ret = kvm_iommu_ops->host_stage2_idmap(start, region_end, prot);
+		if (ret)
+			return ret;
+
+		start = region_end;
+	} while (start < end);
+
+	return 0;
+}
+
+static int kvm_iommu_snapshot_host_stage2(void)
+{
+	int ret;
+	struct kvm_pgtable_walker walker = {
+		.cb	= __snapshot_host_stage2,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+	};
+	struct kvm_pgtable *pgt = &host_mmu.pgt;
+
+	hyp_spin_lock(&host_mmu.lock);
+	ret = kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
+	/* Start receiving calls to host_stage2_idmap. */
+	kvm_idmap_initialized = !ret;
+	hyp_spin_unlock(&host_mmu.lock);
+
+	return ret;
+}
 
 int kvm_iommu_init(void)
 {
-	/* Keep DMA isolation optional. */
-	if (!kvm_iommu_ops || !kvm_iommu_ops->init)
+	int ret;
+
+	if (!kvm_iommu_ops || !kvm_iommu_ops->init ||
+	    !kvm_iommu_ops->host_stage2_idmap)
+		return 0;
+
+	ret = kvm_iommu_ops->init();
+	if (ret)
+		return ret;
+
+	return kvm_iommu_snapshot_host_stage2();
+}
+
+int kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
+				enum kvm_pgtable_prot prot)
+{
+	hyp_assert_lock_held(&host_mmu.lock);
+
+	if (!kvm_idmap_initialized)
 		return 0;
 
-	return kvm_iommu_ops->init();
+	return kvm_iommu_ops->host_stage2_idmap(start, end, pkvm_to_iommu_prot(prot));
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2fb20a63a417..b54cb72ed88c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -15,6 +15,7 @@
 #include <hyp/fault.h>
 
 #include <nvhe/gfp.h>
+#include <nvhe/iommu.h>
 #include <nvhe/memory.h>
 #include <nvhe/mem_protect.h>
 #include <nvhe/mm.h>
@@ -481,6 +482,14 @@ static int check_range_allowed_memory(u64 start, u64 end)
 	return 0;
 }
 
+u64 find_mem_range_from(u64 start, bool *is_memory)
+{
+	struct kvm_mem_range r;
+
+	*is_memory = !!find_mem_range(start, &r);
+	return r.end;
+}
+
 static bool range_is_memory(u64 start, u64 end)
 {
 	struct kvm_mem_range r;
@@ -577,8 +586,34 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
 
 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
 {
+	enum pkvm_page_state old = get_host_state(hyp_phys_to_page(addr));
+	enum kvm_pgtable_prot prot = 0;
+
 	for_each_hyp_page(page, addr, size)
 		set_host_state(page, state);
+
+	/*
+	 * Any transition to PKVM_NOPAGE, unmaps the page from the host
+	 * Any transition to PKVM_PAGE_SHARED_BORROWED, maps the page in the host
+	 * Any transition to PKVM_PAGE_SHARED_OWNED is ignored as page is already mapped.
+	 * Transitions to PKVM_PAGE_OWNED from anything but PKVM_NOPAGE are ignored.
+	 * Transitions to PKVM_PAGE_OWNED from PKVM_NOPAGE will map the page.
+	 */
+	if ((state == PKVM_PAGE_SHARED_OWNED) ||
+		((state == PKVM_PAGE_OWNED) && (old != PKVM_NOPAGE)))
+		return;
+
+	if ((state == PKVM_PAGE_SHARED_BORROWED) ||
+		(state == PKVM_PAGE_OWNED))
+		prot = PKVM_HOST_MEM_PROT;
+
+	/*
+	 * Only update the IOMMU from here, as MMIO can't transition after
+	 * de-privilege, that will need to change when device assignment
+	 * is supported.
+	 * And WARN on failure as we can't unroll at this point.
+	 */
+	WARN_ON(kvm_iommu_host_stage2_idmap(addr, addr + size, prot));
 }
 
 #define KVM_HOST_DONATION_PTE_OWNER_MASK	GENMASK(3, 1)
-- 
2.54.0.545.g6539524ca2-goog


  parent reply	other threads:[~2026-05-01 11:20 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-01 11:19 [PATCH v6 00/25] KVM: arm64: SMMUv3 driver for pKVM (trap and emulate) Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 01/25] KVM: arm64: Generalize trace clock Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 02/25] KVM: arm64: Donate MMIO to the hypervisor Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 03/25] iommu/arm-smmu-v3: Split code with hyp Mostafa Saleh
2026-05-01 12:44   ` Jason Gunthorpe
2026-05-04 12:13     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 04/25] iommu/arm-smmu-v3: Move TLB range invalidation into common code Mostafa Saleh
2026-05-01 12:41   ` Jason Gunthorpe
2026-05-04 12:15     ` Mostafa Saleh
2026-05-05 16:17       ` Jason Gunthorpe
2026-05-05 16:43         ` Mostafa Saleh
2026-05-06  9:53           ` Jason Gunthorpe
2026-05-07  9:40             ` Mostafa Saleh
2026-05-09 23:29               ` Jason Gunthorpe
2026-05-11 11:45                 ` Mostafa Saleh
2026-05-11 14:24                   ` Jason Gunthorpe
2026-05-01 11:19 ` [PATCH v6 05/25] iommu/arm-smmu-v3: Move IDR parsing to common functions Mostafa Saleh
2026-05-01 12:47   ` Jason Gunthorpe
2026-05-04 12:16     ` Mostafa Saleh
2026-05-05 16:27       ` Jason Gunthorpe
2026-05-05 16:48         ` Mostafa Saleh
2026-05-06  9:56           ` Jason Gunthorpe
2026-05-07 10:13             ` Mostafa Saleh
2026-05-09 23:34               ` Jason Gunthorpe
2026-05-11 11:53                 ` Mostafa Saleh
2026-05-11 14:30                   ` Jason Gunthorpe
2026-05-01 11:19 ` [PATCH v6 06/25] iommu/io-pgtable-arm: Rework to use the iommu-pages API Mostafa Saleh
2026-05-01 12:24   ` Jason Gunthorpe
2026-05-04 12:19     ` Mostafa Saleh
2026-05-09 23:21       ` Jason Gunthorpe
2026-05-11 11:16         ` Mostafa Saleh
2026-05-11 14:18           ` Jason Gunthorpe
2026-05-13 21:54             ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 07/25] KVM: arm64: iommu: Introduce IOMMU driver infrastructure Mostafa Saleh
2026-05-01 11:19 ` Mostafa Saleh [this message]
2026-05-01 13:00   ` [PATCH v6 08/25] KVM: arm64: iommu: Shadow host stage-2 page table Jason Gunthorpe
2026-05-04 12:28     ` Mostafa Saleh
2026-05-09 23:27       ` Jason Gunthorpe
2026-05-11 11:24         ` Mostafa Saleh
2026-05-11 14:22           ` Jason Gunthorpe
2026-05-12 10:42             ` Mostafa Saleh
2026-05-12 12:36               ` Jason Gunthorpe
2026-05-01 11:19 ` [PATCH v6 09/25] KVM: arm64: iommu: Add memory pool Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 10/25] KVM: arm64: iommu: Support DABT for IOMMU Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 11/25] iommu/arm-smmu-v3-kvm: Add SMMUv3 driver Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 12/25] iommu/arm-smmu-v3-kvm: Add the kernel driver Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 13/25] iommu/arm-smmu-v3-kvm: Probe SMMU HW Mostafa Saleh
2026-05-01 12:51   ` Jason Gunthorpe
2026-05-04 12:30     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 14/25] iommu/arm-smmu-v3-kvm: Add MMIO emulation Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 15/25] iommu/arm-smmu-v3-kvm: Shadow the command queue Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 16/25] iommu/arm-smmu-v3-kvm: Add CMDQ functions Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 17/25] iommu/arm-smmu-v3-kvm: Emulate CMDQ for host Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 18/25] iommu/arm-smmu-v3-kvm: Shadow stream table Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 19/25] iommu/arm-smmu-v3-kvm: Shadow STEs Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 20/25] iommu/arm-smmu-v3-kvm: Share other queues Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 21/25] iommu/arm-smmu-v3-kvm: Emulate GBPA Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 22/25] iommu/io-pgtable-arm: Support io-pgtable-arm in the hypervisor Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 23/25] iommu/arm-smmu-v3-kvm: Shadow the CPU stage-2 page table Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 24/25] iommu/arm-smmu-v3-kvm: Enable nesting Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 25/25] KVM: arm64: Add documentation for pKVM DMA isolation Mostafa Saleh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260501111928.259252-9-smostafa@google.com \
    --to=smostafa@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=iommu@lists.linux.dev \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=joey.gouly@arm.com \
    --cc=joro@8bytes.org \
    --cc=keirf@google.com \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=qperret@google.com \
    --cc=sebastianene@google.com \
    --cc=suzuki.poulose@arm.com \
    --cc=tabba@google.com \
    --cc=vdonnefort@google.com \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.