Linux IOMMU Development
 help / color / mirror / Atom feed
From: Matthew Rosato <mjrosato@linux.ibm.com>
To: linux-s390@vger.kernel.org
Cc: kvm@vger.kernel.org, david@redhat.com, thuth@redhat.com,
	linux-kernel@vger.kernel.org, vneethv@linux.ibm.com,
	agordeev@linux.ibm.com, imbrenda@linux.ibm.com, will@kernel.org,
	frankja@linux.ibm.com, corbet@lwn.net, linux-doc@vger.kernel.org,
	pasic@linux.ibm.com, jgg@nvidia.com,
	gerald.schaefer@linux.ibm.com, borntraeger@linux.ibm.com,
	farman@linux.ibm.com, gor@linux.ibm.com, schnelle@linux.ibm.com,
	hca@linux.ibm.com, alex.williamson@redhat.com,
	freude@linux.ibm.com, pmorel@linux.ibm.com, cohuck@redhat.com,
	oberpar@linux.ibm.com, iommu@lists.linux-foundation.org,
	svens@linux.ibm.com, pbonzini@redhat.com
Subject: [PATCH v4 18/32] iommu/s390: add support for IOMMU_DOMAIN_KVM
Date: Mon, 14 Mar 2022 15:44:37 -0400	[thread overview]
Message-ID: <20220314194451.58266-19-mjrosato@linux.ibm.com> (raw)
In-Reply-To: <20220314194451.58266-1-mjrosato@linux.ibm.com>

Add an alternate domain ops for type IOMMU_DOMAIN_KVM.  This type is
intended for use when KVM is managing the IOMMU domain on behalf of a
VM.  Mapping can only be performed once a KVM is registered with the
domain as well as a guest IOTA (address translation anchor).

The map operation is expected to be received in response to an
04 intercept of a guest RPCIT instruction, and will perform a
synchronization operation between the host DMA and guest DMA tables
over the range specified.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/kvm_pci.h |   6 +
 arch/s390/include/asm/pci_dma.h |   3 +
 drivers/iommu/Kconfig           |   8 +
 drivers/iommu/Makefile          |   1 +
 drivers/iommu/s390-iommu.c      |  49 ++--
 drivers/iommu/s390-iommu.h      |  53 ++++
 drivers/iommu/s390-kvm-iommu.c  | 469 ++++++++++++++++++++++++++++++++
 7 files changed, 562 insertions(+), 27 deletions(-)
 create mode 100644 drivers/iommu/s390-iommu.h
 create mode 100644 drivers/iommu/s390-kvm-iommu.c

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ae8669105f72..ebc0da5d9ac1 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -11,6 +11,7 @@
 #define ASM_KVM_PCI_H
 
 #include <linux/types.h>
+#include <linux/iommu.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
@@ -19,9 +20,14 @@
 struct kvm_zdev {
 	struct zpci_dev *zdev;
 	struct kvm *kvm;
+	struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
 void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
 
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota);
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev);
+
 #endif /* ASM_KVM_PCI_H */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..38004e0a4383 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,9 @@ enum zpci_ioat_dtype {
 #define ZPCI_TABLE_ALIGN		ZPCI_TABLE_SIZE
 #define ZPCI_TABLE_ENTRY_SIZE		(sizeof(unsigned long))
 #define ZPCI_TABLE_ENTRIES		(ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES		(ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES	(ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
+#define ZPCI_TABLE_ENTRIES_PER_PAGE	(ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES)
 
 #define ZPCI_TABLE_BITS			11
 #define ZPCI_PT_BITS			8
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..9637f73925ec 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -411,6 +411,14 @@ config S390_AP_IOMMU
 	  Enables bits of IOMMU API required by VFIO. The iommu_ops
 	  is not implemented as it is not necessary for VFIO.
 
+config S390_KVM_IOMMU
+	bool "S390 KVM IOMMU Support"
+	depends on S390_IOMMU && KVM || COMPILE_TEST
+	select IOMMU_API
+	help
+	  Extends the S390 IOMMU API to support a domain owned and managed by
+	  KVM. This allows KVM to manage nested mappings vs userspace.
+
 config MTK_IOMMU
 	tristate "MediaTek IOMMU Support"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..5476e978d7f5 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_S390_KVM_IOMMU) += s390-kvm-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 73a85c599dc2..0ead37f6e232 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -11,6 +11,7 @@
 #include <linux/iommu-helper.h>
 #include <linux/sizes.h>
 #include <asm/pci_dma.h>
+#include "s390-iommu.h"
 
 /*
  * Physically contiguous memory regions can be mapped with 4 KiB alignment,
@@ -21,24 +22,6 @@
 
 static const struct iommu_ops s390_iommu_ops;
 
-struct s390_domain {
-	struct iommu_domain	domain;
-	struct list_head	devices;
-	unsigned long		*dma_table;
-	spinlock_t		dma_table_lock;
-	spinlock_t		list_lock;
-};
-
-struct s390_domain_device {
-	struct list_head	list;
-	struct zpci_dev		*zdev;
-};
-
-static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
-{
-	return container_of(dom, struct s390_domain, domain);
-}
-
 static bool s390_iommu_capable(enum iommu_cap cap)
 {
 	switch (cap) {
@@ -55,7 +38,12 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 {
 	struct s390_domain *s390_domain;
 
-	if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+	if (domain_type != IOMMU_DOMAIN_UNMANAGED &&
+	    domain_type != IOMMU_DOMAIN_KVM)
+		return NULL;
+
+	if (domain_type == IOMMU_DOMAIN_KVM &&
+	    !IS_ENABLED(CONFIG_S390_KVM_IOMMU))
 		return NULL;
 
 	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
@@ -68,23 +56,30 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 		return NULL;
 	}
 
+	/* If KVM-managed, swap in alternate ops now */
+	if (IS_ENABLED(CONFIG_S390_KVM_IOMMU) &&
+	    domain_type == IOMMU_DOMAIN_KVM)
+		s390_domain->domain.ops = &s390_kvm_domain_ops;
+
 	spin_lock_init(&s390_domain->dma_table_lock);
 	spin_lock_init(&s390_domain->list_lock);
+	mutex_init(&s390_domain->kvm_dom.ioat_lock);
 	INIT_LIST_HEAD(&s390_domain->devices);
 
 	return &s390_domain->domain;
 }
 
-static void s390_domain_free(struct iommu_domain *domain)
+void s390_domain_free(struct iommu_domain *domain)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 
 	dma_cleanup_tables(s390_domain->dma_table);
+	mutex_destroy(&s390_domain->kvm_dom.ioat_lock);
 	kfree(s390_domain);
 }
 
-static int s390_iommu_attach_device(struct iommu_domain *domain,
-				    struct device *dev)
+int s390_iommu_attach_device(struct iommu_domain *domain,
+			     struct device *dev)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -143,8 +138,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	return rc;
 }
 
-static void s390_iommu_detach_device(struct iommu_domain *domain,
-				     struct device *dev)
+void s390_iommu_detach_device(struct iommu_domain *domain,
+			      struct device *dev)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -200,7 +195,7 @@ static void s390_iommu_release_device(struct device *dev)
 	if (zdev && zdev->s390_domain) {
 		domain = iommu_get_domain_for_dev(dev);
 		if (domain)
-			s390_iommu_detach_device(domain, dev);
+			domain->ops->detach_dev(domain, dev);
 	}
 }
 
@@ -282,8 +277,8 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
 	return rc;
 }
 
-static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
-					   dma_addr_t iova)
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+				    dma_addr_t iova)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	unsigned long *sto, *pto, *rto, flags;
diff --git a/drivers/iommu/s390-iommu.h b/drivers/iommu/s390-iommu.h
new file mode 100644
index 000000000000..21c8243a36b1
--- /dev/null
+++ b/drivers/iommu/s390-iommu.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef _S390_IOMMU_H
+#define _S390_IOMMU_H
+
+#include <linux/iommu.h>
+#include <linux/kvm_host.h>
+
+extern const struct iommu_domain_ops s390_kvm_domain_ops;
+
+struct s390_kvm_domain {
+	struct kvm		*kvm;
+	unsigned long		*head[ZPCI_TABLE_PAGES];
+	unsigned long		**seg;
+	unsigned long		***pt;
+	struct page *(*pin)(struct kvm *kvm, gfn_t gfn);
+	void (*unpin)(kvm_pfn_t pfn);
+	struct mutex		ioat_lock;
+	bool			map_enabled;
+};
+
+struct s390_domain {
+	struct iommu_domain	domain;
+	struct list_head	devices;
+	unsigned long		*dma_table;
+	spinlock_t		dma_table_lock;
+	spinlock_t		list_lock;
+	struct s390_kvm_domain	kvm_dom;
+};
+
+struct s390_domain_device {
+	struct list_head	list;
+	struct zpci_dev		*zdev;
+};
+
+static inline struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct s390_domain, domain);
+}
+
+void s390_domain_free(struct iommu_domain *domain);
+int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev);
+void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev);
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+				    dma_addr_t iova);
+
+#endif /* _S390_IOMMU_H */
diff --git a/drivers/iommu/s390-kvm-iommu.c b/drivers/iommu/s390-kvm-iommu.c
new file mode 100644
index 000000000000..d24e6904d5f8
--- /dev/null
+++ b/drivers/iommu/s390-kvm-iommu.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU API domain ops for s390 PCI devices using KVM passthrough
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/sizes.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_pci.h>
+#include <asm/pci_dma.h>
+#include "s390-iommu.h"
+
+const struct iommu_domain_ops s390_kvm_domain_ops;
+
+static int dma_shadow_cpu_trans(struct s390_kvm_domain *kvm_dom,
+				unsigned long *entry, unsigned long *gentry)
+{
+	phys_addr_t gaddr = 0;
+	unsigned long idx;
+	struct page *page;
+	kvm_pfn_t pfn;
+	gpa_t addr;
+	int rc = 0;
+
+	if (pt_entry_isvalid(*gentry)) {
+		/* pin and validate */
+		addr = *gentry & ZPCI_PTE_ADDR_MASK;
+		idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+		page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+		srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+		if (is_error_page(page))
+			return -EIO;
+		gaddr = page_to_phys(page) + (addr & ~PAGE_MASK);
+	}
+
+	if (pt_entry_isvalid(*entry)) {
+		/* Either we are invalidating, replacing or no-op */
+		if (gaddr != 0) {
+			if ((*entry & ZPCI_PTE_ADDR_MASK) == gaddr) {
+				/* Duplicate */
+				kvm_dom->unpin(*entry >> PAGE_SHIFT);
+			} else {
+				/* Replace */
+				pfn = (*entry >> PAGE_SHIFT);
+				invalidate_pt_entry(entry);
+				set_pt_pfaa(entry, gaddr);
+				validate_pt_entry(entry);
+				kvm_dom->unpin(pfn);
+				rc = 1;
+			}
+		} else {
+			/* Invalidate */
+			pfn = (*entry >> PAGE_SHIFT);
+			invalidate_pt_entry(entry);
+			kvm_dom->unpin(pfn);
+			rc = 1;
+		}
+	} else if (gaddr != 0) {
+		/* New Entry */
+		set_pt_pfaa(entry, gaddr);
+		validate_pt_entry(entry);
+	}
+
+	return rc;
+}
+
+static unsigned long *dma_walk_guest_cpu_trans(struct s390_kvm_domain *kvm_dom,
+					       dma_addr_t dma_addr)
+{
+	unsigned long *rto, *sto, *pto;
+	unsigned int rtx, rts, sx, px, idx;
+	struct page *page;
+	gpa_t addr;
+	int i;
+
+	/* Pin guest segment table if needed */
+	rtx = calc_rtx(dma_addr);
+	rto = kvm_dom->head[(rtx / ZPCI_TABLE_ENTRIES_PER_PAGE)];
+	rts = rtx * ZPCI_TABLE_PAGES;
+	if (!kvm_dom->seg[rts]) {
+		if (!reg_entry_isvalid(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+			return NULL;
+		sto = get_rt_sto(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+		addr = ((u64)sto & ZPCI_RTE_ADDR_MASK);
+		idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+		for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+			page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+			if (is_error_page(page)) {
+				srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+				return NULL;
+			}
+			kvm_dom->seg[rts + i] = (page_to_virt(page) +
+						 (addr & ~PAGE_MASK));
+			addr += PAGE_SIZE;
+		}
+		srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+	}
+
+	/* Allocate pin pointers for another segment table if needed */
+	if (!kvm_dom->pt[rtx]) {
+		kvm_dom->pt[rtx] = kcalloc(ZPCI_TABLE_ENTRIES,
+					   (sizeof(unsigned long *)),
+					   GFP_KERNEL);
+		if (!kvm_dom->pt[rtx])
+			return NULL;
+	}
+	/* Pin guest page table if needed */
+	sx = calc_sx(dma_addr);
+	sto = kvm_dom->seg[(rts + (sx / ZPCI_TABLE_ENTRIES_PER_PAGE))];
+	if (!kvm_dom->pt[rtx][sx]) {
+		if (!reg_entry_isvalid(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+			return NULL;
+		pto = get_st_pto(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+		if (!pto)
+			return NULL;
+		addr = ((u64)pto & ZPCI_STE_ADDR_MASK);
+		idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+		page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+		srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+		if (is_error_page(page))
+			return NULL;
+		kvm_dom->pt[rtx][sx] = page_to_virt(page) + (addr & ~PAGE_MASK);
+	}
+	pto = kvm_dom->pt[rtx][sx];
+
+	/* Return guest PTE */
+	px = calc_px(dma_addr);
+	return &pto[px];
+}
+
+static int dma_table_shadow(struct s390_domain *s390_domain,
+			    dma_addr_t dma_addr, size_t nr_pages,
+			    size_t *mapped_pages)
+{
+	struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+	unsigned long *entry, *gentry;
+	int rc = 0, rc2;
+
+	for (*mapped_pages = 0; *mapped_pages < nr_pages; (*mapped_pages)++) {
+		gentry = dma_walk_guest_cpu_trans(kvm_dom, dma_addr);
+		if (!gentry)
+			continue;
+		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+
+		if (!entry)
+			return -ENOMEM;
+
+		rc2 = dma_shadow_cpu_trans(kvm_dom, entry, gentry);
+		if (rc2 < 0)
+			return -EIO;
+
+		dma_addr += PAGE_SIZE;
+		rc += rc2;
+	}
+
+	return rc;
+}
+
+static int s390_kvm_iommu_update_trans(struct s390_domain *s390_domain,
+				       dma_addr_t dma_addr, size_t nr_pages,
+				       size_t *mapped)
+{
+	struct s390_domain_device *domain_device;
+	unsigned long irq_flags;
+	size_t mapped_pages;
+	int rc = 0;
+	u8 status;
+
+	mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+	rc = dma_table_shadow(s390_domain, dma_addr, nr_pages, &mapped_pages);
+
+	/* If error or no new mappings, leave immediately without refresh */
+	if (rc <= 0)
+		goto exit;
+
+	spin_lock_irqsave(&s390_domain->list_lock, irq_flags);
+	list_for_each_entry(domain_device, &s390_domain->devices, list) {
+		rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+					dma_addr, nr_pages * PAGE_SIZE,
+					&status);
+		if (rc) {
+			if (status == 0)
+				rc = -EINVAL;
+			else
+				rc = -EIO;
+		}
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, irq_flags);
+
+exit:
+	if (mapped)
+		*mapped = mapped_pages << PAGE_SHIFT;
+
+	mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+	return rc;
+}
+
+static int s390_kvm_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			      phys_addr_t paddr, size_t size, int prot,
+			      gfp_t gfp)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	size_t nr_pages;
+
+	int rc = 0;
+
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		return -EINVAL;
+
+	/* Can only perform mapping when a guest IOTA is registered */
+	if (!s390_domain->kvm_dom.map_enabled)
+		return -EINVAL;
+
+	nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	if (!nr_pages)
+		return -EINVAL;
+
+	rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, NULL);
+
+	return rc;
+}
+
+static int s390_kvm_iommu_map_pages(struct iommu_domain *domain,
+				    unsigned long iova, phys_addr_t paddr,
+				    size_t pgsize, size_t pgcount, int prot,
+				    gfp_t gfp, size_t *mapped)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	size_t nr_pages;
+
+	int rc = 0;
+
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		return -EINVAL;
+
+	/* Can only perform mapping when a guest IOTA is registered */
+	if (!s390_domain->kvm_dom.map_enabled)
+		return -EINVAL;
+
+	nr_pages = pgcount * (pgsize / PAGE_SIZE);
+	if (!nr_pages)
+		return -EINVAL;
+
+	rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, mapped);
+
+	return rc;
+}
+
+static void free_pt_entry(struct s390_kvm_domain *kvm_dom, int st, int pt)
+{
+	if (!kvm_dom->pt[st][pt])
+		return;
+
+	kvm_dom->unpin((u64)kvm_dom->pt[st][pt]);
+}
+
+static void free_seg_entry(struct s390_kvm_domain *kvm_dom, int entry)
+{
+	int i, st, count = 0;
+
+	for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+		if (kvm_dom->seg[entry + i]) {
+			kvm_dom->unpin((u64)kvm_dom->seg[entry + i]);
+			count++;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	st = entry / ZPCI_TABLE_PAGES;
+	for (i = 0; i < ZPCI_TABLE_ENTRIES; i++)
+		free_pt_entry(kvm_dom, st, i);
+	kfree(kvm_dom->pt[st]);
+}
+
+static int s390_kvm_clear_ioat_tables(struct s390_domain *s390_domain)
+{
+	struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+	unsigned long *entry;
+	dma_addr_t dma_addr;
+	kvm_pfn_t pfn;
+	int i;
+
+	if (!kvm_dom->kvm || !kvm_dom->map_enabled)
+		return -EINVAL;
+
+	mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+
+	/* Invalidate and unpin remaining guest pages */
+	for (dma_addr = s390_domain->domain.geometry.aperture_start;
+	     dma_addr < s390_domain->domain.geometry.aperture_end;
+	     dma_addr += PAGE_SIZE) {
+		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+		if (entry && pt_entry_isvalid(*entry)) {
+			pfn = (*entry >> PAGE_SHIFT);
+			invalidate_pt_entry(entry);
+			kvm_dom->unpin(pfn);
+		}
+	}
+
+	/* Unpin all shadow tables */
+	for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+		kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+		kvm_dom->head[i] = 0;
+	}
+
+	for (i = 0; i < ZPCI_TABLE_ENTRIES_PAGES; i += ZPCI_TABLE_PAGES)
+		free_seg_entry(kvm_dom, i);
+
+	kfree(kvm_dom->seg);
+	kfree(kvm_dom->pt);
+
+	mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+
+	kvm_dom->map_enabled = false;
+
+	return 0;
+}
+
+static void s390_kvm_domain_free(struct iommu_domain *domain)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+
+	s390_kvm_clear_ioat_tables(s390_domain);
+
+	if (s390_domain->kvm_dom.kvm) {
+		symbol_put(gfn_to_page);
+		symbol_put(kvm_release_pfn_dirty);
+	}
+
+	s390_domain_free(domain);
+}
+
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm)
+{
+	struct s390_domain *s390_domain = zdev->s390_domain;
+	struct iommu_domain *domain = &s390_domain->domain;
+	struct s390_domain_device *domain_device;
+	unsigned long flags;
+	int rc = 0;
+
+	if (domain->type != IOMMU_DOMAIN_KVM)
+		return -EINVAL;
+
+	if (s390_domain->kvm_dom.kvm != 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry(domain_device, &s390_domain->devices, list) {
+		if (domain_device->zdev->kzdev->kvm != kvm) {
+			rc = -EINVAL;
+			break;
+		}
+		domain_device->zdev->kzdev->dom = domain;
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+	if (rc)
+		return rc;
+
+	s390_domain->kvm_dom.pin = symbol_get(gfn_to_page);
+	if (!s390_domain->kvm_dom.pin)
+		return -EINVAL;
+
+	s390_domain->kvm_dom.unpin = symbol_get(kvm_release_pfn_dirty);
+	if (!s390_domain->kvm_dom.unpin) {
+		symbol_put(gfn_to_page);
+		return -EINVAL;
+	}
+
+	s390_domain->kvm_dom.kvm = kvm;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_attach_kvm);
+
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota)
+{
+	struct s390_domain *s390_domain = zdev->s390_domain;
+	struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+	gpa_t gpa = (gpa_t)(iota & ZPCI_RTE_ADDR_MASK);
+	struct page *page;
+	struct kvm *kvm;
+	unsigned int idx;
+	void *iaddr;
+	int i, rc;
+
+	/* Ensure KVM associated and IOTA not already registered */
+	if (!kvm_dom->kvm || kvm_dom->map_enabled)
+		return -EINVAL;
+
+	/* Ensure supported type specified */
+	if ((iota & ZPCI_IOTA_RTTO_FLAG) != ZPCI_IOTA_RTTO_FLAG)
+		return -EINVAL;
+
+	kvm = kvm_dom->kvm;
+	mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+	idx = srcu_read_lock(&kvm->srcu);
+	for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+		page = kvm_dom->pin(kvm, gpa_to_gfn(gpa));
+		if (is_error_page(page)) {
+			srcu_read_unlock(&kvm->srcu, idx);
+			rc = -EIO;
+			goto unpin;
+		}
+		iaddr = page_to_virt(page) + (gpa & ~PAGE_MASK);
+		kvm_dom->head[i] = (unsigned long *)iaddr;
+		gpa += PAGE_SIZE;
+	}
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	kvm_dom->seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
+			       sizeof(unsigned long *), GFP_KERNEL);
+	if (!kvm_dom->seg)
+		goto unpin;
+	kvm_dom->pt = kcalloc(ZPCI_TABLE_ENTRIES, sizeof(unsigned long **),
+			      GFP_KERNEL);
+	if (!kvm_dom->pt)
+		goto free_seg;
+
+	mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+	kvm_dom->map_enabled = true;
+	return 0;
+
+free_seg:
+	kfree(kvm_dom->seg);
+	rc = -ENOMEM;
+unpin:
+	for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+		kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+		kvm_dom->head[i] = 0;
+	}
+	mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_assign_iota);
+
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev)
+{
+	struct s390_domain *s390_domain = zdev->s390_domain;
+
+	return s390_kvm_clear_ioat_tables(s390_domain);
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_remove_iota);
+
+const struct iommu_domain_ops s390_kvm_domain_ops = {
+	.attach_dev	= s390_iommu_attach_device,
+	.detach_dev	= s390_iommu_detach_device,
+	/*
+	 * All iommu mapping and unmapping operations are handled via the map
+	 * ops.  A map over a given range will synchronize the host and guest
+	 * DMA tables, performing the necessary mappings / unmappings to
+	 * synchronize the table states.
+	 * Partial mapping failures do not require a rewind, the guest will
+	 * receive an indication that will trigger a global refresh of the
+	 * tables.
+	 */
+	.map		= s390_kvm_iommu_map,
+	.map_pages	= s390_kvm_iommu_map_pages,
+	.unmap		= NULL,
+	.unmap_pages	= NULL,
+	.iova_to_phys	= s390_iommu_iova_to_phys,
+	.free		= s390_kvm_domain_free,
+};
-- 
2.27.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  parent reply	other threads:[~2022-03-14 19:48 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-14 19:44 [PATCH v4 00/32] KVM: s390: enable zPCI for interpretive execution Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 01/32] s390/sclp: detect the zPCI load/store interpretation facility Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 02/32] s390/sclp: detect the AISII facility Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 03/32] s390/sclp: detect the AENI facility Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 04/32] s390/sclp: detect the AISI facility Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 05/32] s390/airq: pass more TPI info to airq handlers Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 06/32] s390/airq: allow for airq structure that uses an input vector Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 07/32] s390/pci: externalize the SIC operation controls and routine Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 08/32] s390/pci: stash associated GISA designation Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 09/32] s390/pci: export some routines related to RPCIT processing Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 10/32] s390/pci: stash dtsm and maxstbl Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 11/32] s390/pci: add helper function to find device by handle Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 12/32] s390/pci: get SHM information from list pci Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 13/32] s390/pci: return status from zpci_refresh_trans Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 14/32] iommu: introduce iommu_domain_alloc_type and the KVM type Matthew Rosato
2022-03-14 21:36   ` Jason Gunthorpe via iommu
2022-03-15 10:49   ` Robin Murphy
2022-03-17  5:47     ` Tian, Kevin
2022-03-17 13:52       ` Jason Gunthorpe via iommu
2022-03-18  2:23         ` Tian, Kevin
2022-03-18 14:13           ` Jason Gunthorpe via iommu
2022-03-19  7:51             ` Tian, Kevin
2022-03-21 14:07               ` Jason Gunthorpe via iommu
2022-03-22  7:30                 ` Tian, Kevin
2022-03-14 19:44 ` [PATCH v4 15/32] vfio: introduce KVM-owned IOMMU type Matthew Rosato
2022-03-14 21:38   ` Jason Gunthorpe via iommu
2022-03-15 13:49     ` Matthew Rosato
2022-03-15 14:38       ` Jason Gunthorpe via iommu
2022-03-15 16:29         ` Matthew Rosato
2022-03-15 17:25           ` Jason Gunthorpe via iommu
2022-03-17 18:51             ` Matthew Rosato
2022-03-14 22:50   ` Alex Williamson
2022-03-14 23:18     ` Jason Gunthorpe via iommu
2022-03-15  7:57       ` Tian, Kevin
2022-03-15 14:17         ` Matthew Rosato
2022-03-15 17:01           ` Matthew Rosato
2022-03-15 13:36       ` Matthew Rosato
2022-03-15 14:55         ` Jason Gunthorpe via iommu
2022-03-15 16:04           ` Matthew Rosato
2022-03-15 17:18             ` Jason Gunthorpe via iommu
2022-03-18  7:01           ` Tian, Kevin
2022-03-18 13:46             ` Jason Gunthorpe via iommu
2022-03-19  7:47               ` Tian, Kevin
2022-03-14 19:44 ` [PATCH v4 16/32] vfio-pci/zdev: add function handle to clp base capability Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 17/32] KVM: s390: pci: add basic kvm_zdev structure Matthew Rosato
2022-03-14 19:44 ` Matthew Rosato [this message]
2022-03-14 19:44 ` [PATCH v4 19/32] KVM: s390: pci: do initial setup for AEN interpretation Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 20/32] KVM: s390: pci: enable host forwarding of Adapter Event Notifications Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 21/32] KVM: s390: mechanism to enable guest zPCI Interpretation Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 22/32] KVM: s390: pci: routines for (dis)associating zPCI devices with a KVM Matthew Rosato
2022-03-14 21:46   ` Jason Gunthorpe via iommu
2022-03-15 16:39     ` Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 23/32] KVM: s390: pci: provide routines for enabling/disabling interpretation Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 24/32] KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 25/32] KVM: s390: pci: provide routines for enabling/disabling IOAT assist Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 26/32] KVM: s390: pci: handle refresh of PCI translations Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 27/32] KVM: s390: intercept the rpcit instruction Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 28/32] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 29/32] vfio-pci/zdev: add DTSM to clp group capability Matthew Rosato
2022-03-14 21:49   ` Jason Gunthorpe via iommu
2022-03-15 14:39     ` Matthew Rosato
2022-03-15 14:56       ` Jason Gunthorpe via iommu
2022-03-14 19:44 ` [PATCH v4 30/32] KVM: s390: introduce CPU feature for zPCI Interpretation Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 31/32] MAINTAINERS: additional files related kvm s390 pci passthrough Matthew Rosato
2022-03-14 19:44 ` [PATCH v4 32/32] MAINTAINERS: update s390 IOMMU entry Matthew Rosato
2022-03-14 19:52 ` [PATCH v4 00/32] KVM: s390: enable zPCI for interpretive execution Matthew Rosato

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220314194451.58266-19-mjrosato@linux.ibm.com \
    --to=mjrosato@linux.ibm.com \
    --cc=agordeev@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=corbet@lwn.net \
    --cc=david@redhat.com \
    --cc=farman@linux.ibm.com \
    --cc=frankja@linux.ibm.com \
    --cc=freude@linux.ibm.com \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jgg@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=oberpar@linux.ibm.com \
    --cc=pasic@linux.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=pmorel@linux.ibm.com \
    --cc=schnelle@linux.ibm.com \
    --cc=svens@linux.ibm.com \
    --cc=thuth@redhat.com \
    --cc=vneethv@linux.ibm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox