public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mostafa Saleh <smostafa@google.com>
To: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org,  kvmarm@lists.linux.dev,
	iommu@lists.linux.dev
Cc: catalin.marinas@arm.com, will@kernel.org, maz@kernel.org,
	 oliver.upton@linux.dev, joey.gouly@arm.com,
	suzuki.poulose@arm.com,  yuzenghui@huawei.com, joro@8bytes.org,
	jean-philippe@linaro.org, jgg@ziepe.ca,  mark.rutland@arm.com,
	qperret@google.com, tabba@google.com,  vdonnefort@google.com,
	sebastianene@google.com, keirf@google.com,
	 Mostafa Saleh <smostafa@google.com>
Subject: [PATCH v6 15/25] iommu/arm-smmu-v3-kvm: Shadow the command queue
Date: Fri,  1 May 2026 11:19:17 +0000	[thread overview]
Message-ID: <20260501111928.259252-16-smostafa@google.com> (raw)
In-Reply-To: <20260501111928.259252-1-smostafa@google.com>

At boot allocate a command queue per SMMU which is used as a shadow
by the hypervisor.

The command queue size is 64K which is more than enough, as the
hypervisor would consume all the entries per a command queue prod
write, which means it can handle up to 4096 at a time.

Then, the host command queue needs to be pinned in a shared state, so
it can't be donated to VMs, and avoid tricking the hypervisor into
accessing them. This is done each time the command queue is enabled,
and undone each time the command queue is disabled.
The hypervisor won’t access the host command queue when it is disabled
from the host.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   |  25 ++++
 .../iommu/arm/arm-smmu-v3/pkvm/arm-smmu-v3.c  | 122 +++++++++++++++++-
 .../iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h  |   8 ++
 3 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
index 9765d3d636d7..fccbc34de087 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -15,6 +15,8 @@
 #include "arm-smmu-v3.h"
 #include "pkvm/arm_smmu_v3.h"
 
+#define SMMU_KVM_CMDQ_ORDER				4
+
 extern struct kvm_iommu_ops kvm_nvhe_sym(smmu_ops);
 
 static size_t				kvm_arm_smmu_count;
@@ -24,6 +26,15 @@ static size_t				kvm_arm_smmu_cur;
 static void kvm_arm_smmu_array_free(void)
 {
 	int order;
+	int i;
+
+	for (i = 0 ; i < kvm_arm_smmu_cur ; ++i) {
+		struct hyp_arm_smmu_v3_device *smmu = &kvm_arm_smmu_array[i];
+
+		if (smmu->cmdq.base_dma)
+			free_pages((unsigned long)phys_to_virt(smmu->cmdq.base_dma),
+				   SMMU_KVM_CMDQ_ORDER);
+	}
 
 	order = get_order(kvm_arm_smmu_count * sizeof(*kvm_arm_smmu_array));
 	free_pages((unsigned long)kvm_arm_smmu_array, order);
@@ -70,6 +81,7 @@ static int smmuv3_nesting_probe(struct platform_device *pdev)
 	struct hyp_arm_smmu_v3_device *smmu = &kvm_arm_smmu_array[kvm_arm_smmu_cur];
 	struct device *dev = &pdev->dev;
 	struct resource *res;
+	void *cmdq_base;
 
 	/* Only device tree, ACPI not supported. */
 	if (!dev->of_node)
@@ -95,6 +107,19 @@ static int smmuv3_nesting_probe(struct platform_device *pdev)
 	if (of_dma_is_coherent(dev->of_node))
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
+	/*
+	 * Allocate the shadow command queue, it doesn't have to be the same
+	 * size as the host.
+	 * Only populate base_dma and llq.max_n_shift, the hypervisor will init
+	 * the rest.
+	 */
+	cmdq_base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, SMMU_KVM_CMDQ_ORDER);
+	if (!cmdq_base)
+		return -ENOMEM;
+
+	smmu->cmdq.base_dma = virt_to_phys(cmdq_base);
+	smmu->cmdq.llq.max_n_shift = SMMU_KVM_CMDQ_ORDER + PAGE_SHIFT - CMDQ_ENT_SZ_SHIFT;
+
 	kvm_arm_smmu_cur++;
 	return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm-smmu-v3.c
index cce5a51b4656..3b77796dafc7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm-smmu-v3.c
@@ -11,7 +11,6 @@
 #include <nvhe/trap_handler.h>
 
 #include "arm_smmu_v3.h"
-#include "../arm-smmu-v3.h"
 
 size_t __ro_after_init kvm_hyp_arm_smmu_v3_count;
 struct hyp_arm_smmu_v3_device *kvm_hyp_arm_smmu_v3_smmus;
@@ -21,10 +20,68 @@ struct hyp_arm_smmu_v3_device *kvm_hyp_arm_smmu_v3_smmus;
 	     (smmu) != &kvm_hyp_arm_smmu_v3_smmus[kvm_hyp_arm_smmu_v3_count]; \
 	     (smmu)++)
 
+#define cmdq_size(cmdq)	((1 << ((cmdq)->llq.max_n_shift)) * CMDQ_ENT_DWORDS * 8)
+
+static bool is_cmdq_enabled(struct hyp_arm_smmu_v3_device *smmu)
+{
+	return FIELD_GET(CR0_CMDQEN, smmu->cr0);
+}
+
+/*
+ * CMDQ, STE host copies are accessed by the hypervisor, we share them to
+ * - Prevent the host from passing protected VM memory.
+ * - Having them mapped in the hyp page table.
+ */
+static int smmu_share_pages(phys_addr_t addr, size_t size)
+{
+	size_t nr_pages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+	phys_addr_t base = addr & PAGE_MASK;
+	int i, ret;
+
+	for (i = 0 ; i < nr_pages ; ++i) {
+		if (__pkvm_host_share_hyp((base + i * PAGE_SIZE) >> PAGE_SHIFT)) {
+			while (i--)
+				__pkvm_host_unshare_hyp((base + i * PAGE_SIZE) >> PAGE_SHIFT);
+			return -EPERM;
+		}
+	}
+
+	ret = hyp_pin_shared_mem(hyp_phys_to_virt(base),
+				 hyp_phys_to_virt(base + nr_pages * PAGE_SIZE));
+	if (ret) {
+		for (i = 0 ; i < nr_pages ; ++i)
+			__pkvm_host_unshare_hyp((base + i * PAGE_SIZE) >> PAGE_SHIFT);
+	}
+
+	return ret;
+}
+
+static int smmu_unshare_pages(phys_addr_t addr, size_t size)
+{
+	size_t nr_pages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+	phys_addr_t base = addr & PAGE_MASK;
+	int i, ret;
+
+	hyp_unpin_shared_mem(hyp_phys_to_virt(base),
+			     hyp_phys_to_virt(base + nr_pages * PAGE_SIZE));
+
+	for (i = 0 ; i < nr_pages ; ++i) {
+		ret = __pkvm_host_unshare_hyp((base + i * PAGE_SIZE) >> PAGE_SHIFT);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* Put the device in a state that can be probed by the host driver. */
 static void smmu_deinit_device(struct hyp_arm_smmu_v3_device *smmu)
 {
 	WARN_ON(__pkvm_hyp_donate_host_mmio(smmu->mmio_addr, smmu->mmio_size));
+
+	if (smmu->cmdq.base)
+		WARN_ON(__pkvm_hyp_donate_host(smmu->cmdq.base_dma >> PAGE_SHIFT,
+					       cmdq_size(&smmu->cmdq) >> PAGE_SHIFT));
 	smmu->base = NULL;
 }
 
@@ -99,6 +156,31 @@ static int smmu_probe(struct hyp_arm_smmu_v3_device *smmu)
 	return 0;
 }
 
+/*
+ * The kernel part of the driver will allocate the shadow cmdq,
+ * and zero it. This function only donates it.
+ */
+static int smmu_init_cmdq(struct hyp_arm_smmu_v3_device *smmu)
+{
+	size_t cmdq_nr_pages = cmdq_size(&smmu->cmdq) >> PAGE_SHIFT;
+	int ret;
+
+	ret = __pkvm_host_donate_hyp(smmu->cmdq.base_dma >> PAGE_SHIFT, cmdq_nr_pages);
+	if (ret)
+		return ret;
+
+	smmu->cmdq.base = hyp_phys_to_virt(smmu->cmdq.base_dma);
+	smmu->cmdq.prod_reg = smmu->base + ARM_SMMU_CMDQ_PROD;
+	smmu->cmdq.cons_reg = smmu->base + ARM_SMMU_CMDQ_CONS;
+	smmu->cmdq.q_base = smmu->cmdq.base_dma |
+			    FIELD_PREP(Q_BASE_LOG2SIZE, smmu->cmdq.llq.max_n_shift);
+	smmu->cmdq.ent_dwords = CMDQ_ENT_DWORDS;
+	writel_relaxed(0, smmu->cmdq.prod_reg);
+	writel_relaxed(0, smmu->cmdq.cons_reg);
+	writeq_relaxed(smmu->cmdq.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
+	return 0;
+}
+
 static int smmu_init_device(struct hyp_arm_smmu_v3_device *smmu)
 {
 	unsigned long haddr;
@@ -117,7 +199,12 @@ static int smmu_init_device(struct hyp_arm_smmu_v3_device *smmu)
 	if (ret)
 		goto out_ret;
 
+	ret = smmu_init_cmdq(smmu);
+	if (ret)
+		goto out_ret;
+
 	return 0;
+
 out_ret:
 	smmu_deinit_device(smmu);
 	return ret;
@@ -157,6 +244,22 @@ static int smmu_init(void)
 	return ret;
 }
 
+static void smmu_emulate_cmdq_enable(struct hyp_arm_smmu_v3_device *smmu)
+{
+	u32 shift = smmu->cmdq_host.q_base & Q_BASE_LOG2SIZE;
+
+	smmu->cmdq_host.llq.max_n_shift = min(shift, 19);
+	smmu->cmdq_host.base_dma = smmu->cmdq_host.q_base & Q_BASE_ADDR_MASK;
+	WARN_ON(smmu_share_pages(smmu->cmdq_host.base_dma,
+				 cmdq_size(&smmu->cmdq_host)));
+}
+
+static void smmu_emulate_cmdq_disable(struct hyp_arm_smmu_v3_device *smmu)
+{
+	WARN_ON(smmu_unshare_pages(smmu->cmdq_host.base_dma,
+				   cmdq_size(&smmu->cmdq_host)));
+}
+
 static bool smmu_dabt_device(struct hyp_arm_smmu_v3_device *smmu,
 			     struct user_pt_regs *regs,
 			     u64 esr, u32 off)
@@ -180,6 +283,14 @@ static bool smmu_dabt_device(struct hyp_arm_smmu_v3_device *smmu,
 		break;
 	/* Passthrough the register access for bisectiblity, handled later */
 	case ARM_SMMU_CMDQ_BASE:
+		if (is_write) {
+			/* Not allowed by the architecture */
+			if (WARN_ON(is_cmdq_enabled(smmu)))
+				break;
+			smmu->cmdq_host.q_base = val;
+		}
+		mask = read_write;
+		break;
 	case ARM_SMMU_CMDQ_PROD:
 	case ARM_SMMU_CMDQ_CONS:
 	case ARM_SMMU_STRTAB_BASE:
@@ -190,6 +301,15 @@ static bool smmu_dabt_device(struct hyp_arm_smmu_v3_device *smmu,
 	case ARM_SMMU_CR0:
 		if (len != sizeof(u32))
 			break;
+		if (is_write) {
+			bool last_cmdq_en = is_cmdq_enabled(smmu);
+
+			smmu->cr0 = val;
+			if (!last_cmdq_en && is_cmdq_enabled(smmu))
+				smmu_emulate_cmdq_enable(smmu);
+			else if (last_cmdq_en && !is_cmdq_enabled(smmu))
+				smmu_emulate_cmdq_disable(smmu);
+		}
 		mask = read_write;
 		break;
 	case ARM_SMMU_CR1: {
diff --git a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
index 263b0fef262d..cc1ad4c19845 100644
--- a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
@@ -8,6 +8,8 @@
 #include <nvhe/spinlock.h>
 #endif
 
+#include "../arm-smmu-v3.h"
+
 /*
  * Parameters from the trusted host:
  * @mmio_addr		base address of the SMMU registers
@@ -20,6 +22,9 @@
  * @pgsize_bitmap	Supported page sizes
  * @sid_bits		Max number of SID bits supported
  * @lock		Lock to protect SMMU
+ * @cmdq		CMDQ as observed by HW
+ * @cmdq_host		Host view of the CMDQ, only q_base and llq used.
+ * @cr0			Last value of CR0
  */
 struct hyp_arm_smmu_v3_device {
 	phys_addr_t		mmio_addr;
@@ -34,6 +39,9 @@ struct hyp_arm_smmu_v3_device {
 #else
 	u32			lock;
 #endif
+	struct arm_smmu_queue	cmdq;
+	struct arm_smmu_queue	cmdq_host;
+	u32			cr0;
 };
 
 extern size_t kvm_nvhe_sym(kvm_hyp_arm_smmu_v3_count);
-- 
2.54.0.545.g6539524ca2-goog


  parent reply	other threads:[~2026-05-01 11:20 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-01 11:19 [PATCH v6 00/25] KVM: arm64: SMMUv3 driver for pKVM (trap and emulate) Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 01/25] KVM: arm64: Generalize trace clock Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 02/25] KVM: arm64: Donate MMIO to the hypervisor Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 03/25] iommu/arm-smmu-v3: Split code with hyp Mostafa Saleh
2026-05-01 12:44   ` Jason Gunthorpe
2026-05-04 12:13     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 04/25] iommu/arm-smmu-v3: Move TLB range invalidation into common code Mostafa Saleh
2026-05-01 12:41   ` Jason Gunthorpe
2026-05-04 12:15     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 05/25] iommu/arm-smmu-v3: Move IDR parsing to common functions Mostafa Saleh
2026-05-01 12:47   ` Jason Gunthorpe
2026-05-04 12:16     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 06/25] iommu/io-pgtable-arm: Rework to use the iommu-pages API Mostafa Saleh
2026-05-01 12:24   ` Jason Gunthorpe
2026-05-04 12:19     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 07/25] KVM: arm64: iommu: Introduce IOMMU driver infrastructure Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 08/25] KVM: arm64: iommu: Shadow host stage-2 page table Mostafa Saleh
2026-05-01 13:00   ` Jason Gunthorpe
2026-05-04 12:28     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 09/25] KVM: arm64: iommu: Add memory pool Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 10/25] KVM: arm64: iommu: Support DABT for IOMMU Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 11/25] iommu/arm-smmu-v3-kvm: Add SMMUv3 driver Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 12/25] iommu/arm-smmu-v3-kvm: Add the kernel driver Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 13/25] iommu/arm-smmu-v3-kvm: Probe SMMU HW Mostafa Saleh
2026-05-01 12:51   ` Jason Gunthorpe
2026-05-04 12:30     ` Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 14/25] iommu/arm-smmu-v3-kvm: Add MMIO emulation Mostafa Saleh
2026-05-01 11:19 ` Mostafa Saleh [this message]
2026-05-01 11:19 ` [PATCH v6 16/25] iommu/arm-smmu-v3-kvm: Add CMDQ functions Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 17/25] iommu/arm-smmu-v3-kvm: Emulate CMDQ for host Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 18/25] iommu/arm-smmu-v3-kvm: Shadow stream table Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 19/25] iommu/arm-smmu-v3-kvm: Shadow STEs Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 20/25] iommu/arm-smmu-v3-kvm: Share other queues Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 21/25] iommu/arm-smmu-v3-kvm: Emulate GBPA Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 22/25] iommu/io-pgtable-arm: Support io-pgtable-arm in the hypervisor Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 23/25] iommu/arm-smmu-v3-kvm: Shadow the CPU stage-2 page table Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 24/25] iommu/arm-smmu-v3-kvm: Enable nesting Mostafa Saleh
2026-05-01 11:19 ` [PATCH v6 25/25] KVM: arm64: Add documentation for pKVM DMA isolation Mostafa Saleh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260501111928.259252-16-smostafa@google.com \
    --to=smostafa@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=iommu@lists.linux.dev \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=joey.gouly@arm.com \
    --cc=joro@8bytes.org \
    --cc=keirf@google.com \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=qperret@google.com \
    --cc=sebastianene@google.com \
    --cc=suzuki.poulose@arm.com \
    --cc=tabba@google.com \
    --cc=vdonnefort@google.com \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox