public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/6] iommu/s390: support additional table regions
@ 2025-01-24 20:18 Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 1/6] iommu/s390: add initial fields to track table size Matthew Rosato
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

The series extends the maximum table size allowed by s390-iommu by
increasing the number of table regions supported.  It also adds logic to
construct the table use the minimum number of regions based upon aperture
calculation.

Changes for v2:
- rebase onto 6.13
- remove 'iommu/s390: add basic routines for region 1st and 2nd tables'
  and put routines in first patch that uses each.  No functional change.


Matthew Rosato (6):
  iommu/s390: add initial fields to track table size
  s390/pci: set appropriate IOTA region type
  iommu/s390: support cleanup of additional table regions
  iommu/s390: support iova_to_phys for additional table regions
  iommu/s390: support map/unmap for additional table regions
  iommu/s390: allow larger region tables

 arch/s390/include/asm/pci.h     |   2 +
 arch/s390/include/asm/pci_dma.h |   3 +
 arch/s390/pci/pci.c             |  17 +-
 drivers/iommu/s390-iommu.c      | 289 +++++++++++++++++++++++++++++---
 4 files changed, 285 insertions(+), 26 deletions(-)

-- 
2.48.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/6] iommu/s390: add initial fields to track table size
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  2025-02-05 15:08   ` Niklas Schnelle
  2025-01-24 20:18 ` [PATCH v2 2/6] s390/pci: set appropriate IOTA region type Matthew Rosato
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

In preparation for allowing for additional table levels, add the
necessary information to the zdev and s390 domain to track table
type.  For now, these values will always be set to signify a
region third table which is what s390-iommu always uses today.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/pci.h |  2 ++
 drivers/iommu/s390-iommu.c  | 17 +++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 474e1f8d1d3c..8f8ebaeec60a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -189,6 +189,8 @@ struct zpci_dev {
 	struct kvm_zdev *kzdev;
 	struct mutex kzdev_lock;
 	spinlock_t dom_lock;		/* protect s390_domain change */
+	u64 max_table_size;
+	u8 origin_type;
 };
 
 static inline bool zdev_enabled(struct zpci_dev *zdev)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbdeded3d48b..f6c27b6ab4c6 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -31,6 +31,7 @@ struct s390_domain {
 	unsigned long		*dma_table;
 	spinlock_t		list_lock;
 	struct rcu_head		rcu;
+	u8			origin_type;
 };
 
 static struct iommu_domain blocking_domain;
@@ -331,6 +332,7 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
 
 static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
 {
+	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct s390_domain *s390_domain;
 
 	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
@@ -344,7 +346,8 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
 	}
 	s390_domain->domain.geometry.force_aperture = true;
 	s390_domain->domain.geometry.aperture_start = 0;
-	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
+	s390_domain->domain.geometry.aperture_end = zdev->max_table_size;
+	s390_domain->origin_type = zdev->origin_type;
 
 	spin_lock_init(&s390_domain->list_lock);
 	INIT_LIST_HEAD_RCU(&s390_domain->devices);
@@ -450,9 +453,9 @@ static void s390_iommu_get_resv_regions(struct device *dev,
 		list_add_tail(&region->list, list);
 	}
 
-	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+	if (zdev->end_dma < zdev->max_table_size) {
 		region = iommu_alloc_resv_region(zdev->end_dma + 1,
-						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+						 zdev->max_table_size - zdev->end_dma,
 						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
 		if (!region)
 			return;
@@ -470,11 +473,11 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 	zdev = to_zpci_dev(dev);
 
 	if (zdev->start_dma > zdev->end_dma ||
-	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+	    zdev->start_dma > zdev->max_table_size)
 		return ERR_PTR(-EINVAL);
 
-	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
-		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+	if (zdev->end_dma > zdev->max_table_size)
+		zdev->end_dma = zdev->max_table_size;
 
 	if (zdev->tlb_refresh)
 		dev->iommu->shadow_on_flush = 1;
@@ -732,6 +735,8 @@ int zpci_init_iommu(struct zpci_dev *zdev)
 			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 			     zdev->end_dma - zdev->start_dma + 1);
 	zdev->end_dma = zdev->start_dma + aperture_size - 1;
+	zdev->origin_type = ZPCI_TABLE_TYPE_RTX;
+	zdev->max_table_size = ZPCI_TABLE_SIZE_RT - 1;
 
 	return 0;
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/6] s390/pci: set appropriate IOTA region type
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 1/6] iommu/s390: add initial fields to track table size Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 3/6] iommu/s390: support cleanup of additional table regions Matthew Rosato
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

When registering the I/O Translation Anchor, use the current table type
stored in the zdev to set the apporpriate region type indication.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/pci/pci.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 88f72745fa59..9f7adf913729 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -116,6 +116,21 @@ int pci_proc_domain(struct pci_bus *bus)
 }
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 
+static u64 zpci_get_iota_region_flag(struct zpci_dev *zdev)
+{
+	switch (zdev->origin_type) {
+	case ZPCI_TABLE_TYPE_RTX:
+		return ZPCI_IOTA_RTTO_FLAG;
+	case ZPCI_TABLE_TYPE_RSX:
+		return ZPCI_IOTA_RSTO_FLAG;
+	case ZPCI_TABLE_TYPE_RFX:
+		return ZPCI_IOTA_RFTO_FLAG;
+	default:
+		WARN_ONCE(1, "Invalid IOMMU table (%x)\n", zdev->origin_type);
+		return 0;
+	}
+}
+
 /* Modify PCI: Register I/O address translation parameters */
 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
 		       u64 base, u64 limit, u64 iota, u8 *status)
@@ -131,7 +146,7 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
 		fib.pal = limit + (1 << 12);
 	else
 		fib.pal = limit;
-	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+	fib.iota = iota | zpci_get_iota_region_flag(zdev);
 	fib.gd = zdev->gisa;
 	cc = zpci_mod_fc(req, &fib, status);
 	if (cc)
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/6] iommu/s390: support cleanup of additional table regions
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 1/6] iommu/s390: add initial fields to track table size Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 2/6] s390/pci: set appropriate IOTA region type Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 4/6] iommu/s390: support iova_to_phys for " Matthew Rosato
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

Extend the existing dma_cleanup_tables to also handle region second and
region first tables.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/iommu/s390-iommu.c | 71 ++++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index f6c27b6ab4c6..73e0bb251bc1 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -121,6 +121,22 @@ static inline int pt_entry_isvalid(unsigned long entry)
 	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
 }
 
+static inline unsigned long *get_rf_rso(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RFX)
+		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+	else
+		return NULL;
+}
+
+static inline unsigned long *get_rs_rto(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RSX)
+		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+	else
+		return NULL;
+}
+
 static inline unsigned long *get_rt_sto(unsigned long entry)
 {
 	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
@@ -192,18 +208,59 @@ static void dma_free_seg_table(unsigned long entry)
 	dma_free_cpu_table(sto);
 }
 
-static void dma_cleanup_tables(unsigned long *table)
+static void dma_free_rt_table(unsigned long entry)
 {
+	unsigned long *rto = get_rs_rto(entry);
 	int rtx;
 
-	if (!table)
+	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+		if (reg_entry_isvalid(rto[rtx]))
+			dma_free_seg_table(rto[rtx]);
+
+	dma_free_cpu_table(rto);
+}
+
+static void dma_free_rs_table(unsigned long entry)
+{
+	unsigned long *rso = get_rf_rso(entry);
+	int rsx;
+
+	for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
+		if (reg_entry_isvalid(rso[rsx]))
+			dma_free_rt_table(rso[rsx]);
+
+	dma_free_cpu_table(rso);
+}
+
+static void dma_cleanup_tables(struct s390_domain *domain)
+{
+	int rtx, rsx, rfx;
+
+	if (!domain->dma_table)
 		return;
 
-	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
-		if (reg_entry_isvalid(table[rtx]))
-			dma_free_seg_table(table[rtx]);
+	switch (domain->origin_type) {
+	case ZPCI_TABLE_TYPE_RFX:
+		for (rfx = 0; rfx < ZPCI_TABLE_ENTRIES; rfx++)
+			if (reg_entry_isvalid(domain->dma_table[rfx]))
+				dma_free_rs_table(domain->dma_table[rfx]);
+		break;
+	case ZPCI_TABLE_TYPE_RSX:
+		for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
+			if (reg_entry_isvalid(domain->dma_table[rsx]))
+				dma_free_rt_table(domain->dma_table[rsx]);
+		break;
+	case ZPCI_TABLE_TYPE_RTX:
+		for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+			if (reg_entry_isvalid(domain->dma_table[rtx]))
+				dma_free_seg_table(domain->dma_table[rtx]);
+		break;
+	default:
+		WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type);
+		return;
+	}
 
-	dma_free_cpu_table(table);
+	dma_free_cpu_table(domain->dma_table);
 }
 
 static unsigned long *dma_alloc_page_table(gfp_t gfp)
@@ -359,7 +416,7 @@ static void s390_iommu_rcu_free_domain(struct rcu_head *head)
 {
 	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
 
-	dma_cleanup_tables(s390_domain->dma_table);
+	dma_cleanup_tables(s390_domain);
 	kfree(s390_domain);
 }
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/6] iommu/s390: support iova_to_phys for additional table regions
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
                   ` (2 preceding siblings ...)
  2025-01-24 20:18 ` [PATCH v2 3/6] iommu/s390: support cleanup of additional table regions Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 5/6] iommu/s390: support map/unmap " Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 6/6] iommu/s390: allow larger region tables Matthew Rosato
  5 siblings, 0 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

The origin_type of the dma_table is used to determine how many table
levels must be traversed for the translation.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/pci_dma.h |  2 ++
 drivers/iommu/s390-iommu.c      | 52 ++++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 42d7cc4262ca..8d8962e4fd58 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -55,6 +55,8 @@ enum zpci_ioat_dtype {
 #define ZPCI_PT_BITS			8
 #define ZPCI_ST_SHIFT			(ZPCI_PT_BITS + PAGE_SHIFT)
 #define ZPCI_RT_SHIFT			(ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT			(ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT			(ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
 
 #define ZPCI_RTE_FLAG_MASK		0x3fffUL
 #define ZPCI_RTE_ADDR_MASK		(~ZPCI_RTE_FLAG_MASK)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 73e0bb251bc1..88adc4efc53f 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -36,6 +36,16 @@ struct s390_domain {
 
 static struct iommu_domain blocking_domain;
 
+static inline unsigned int calc_rfx(dma_addr_t ptr)
+{
+	return ((unsigned long)ptr >> ZPCI_RF_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_rsx(dma_addr_t ptr)
+{
+	return ((unsigned long)ptr >> ZPCI_RS_SHIFT) & ZPCI_INDEX_MASK;
+}
+
 static inline unsigned int calc_rtx(dma_addr_t ptr)
 {
 	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@@ -704,6 +714,43 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
 	return rc;
 }
 
+static unsigned long *get_rto_from_iova(struct s390_domain *domain,
+					dma_addr_t iova)
+{
+	unsigned long *rfo, *rso, *rto;
+	unsigned long rfe, rse;
+	unsigned int rfx, rsx;
+
+	switch (domain->origin_type) {
+	case ZPCI_TABLE_TYPE_RFX:
+		rfo = domain->dma_table;
+		goto itp_rf;
+	case ZPCI_TABLE_TYPE_RSX:
+		rso = domain->dma_table;
+		goto itp_rs;
+	case ZPCI_TABLE_TYPE_RTX:
+		return domain->dma_table;
+	default:
+		return NULL;
+	}
+
+itp_rf:
+	rfx = calc_rfx(iova);
+	rfe = READ_ONCE(rfo[rfx]);
+	if (!reg_entry_isvalid(rfe))
+		return NULL;
+	rso = get_rf_rso(rfe);
+
+itp_rs:
+	rsx = calc_rsx(iova);
+	rse = READ_ONCE(rso[rsx]);
+	if (!reg_entry_isvalid(rse))
+		return NULL;
+	rto = get_rs_rto(rse);
+
+	return rto;
+}
+
 static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 					   dma_addr_t iova)
 {
@@ -717,10 +764,13 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 	    iova > domain->geometry.aperture_end)
 		return 0;
 
+	rto = get_rto_from_iova(s390_domain, iova);
+	if (!rto)
+		return 0;
+
 	rtx = calc_rtx(iova);
 	sx = calc_sx(iova);
 	px = calc_px(iova);
-	rto = s390_domain->dma_table;
 
 	rte = READ_ONCE(rto[rtx]);
 	if (reg_entry_isvalid(rte)) {
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 5/6] iommu/s390: support map/unmap for additional table regions
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
                   ` (3 preceding siblings ...)
  2025-01-24 20:18 ` [PATCH v2 4/6] iommu/s390: support iova_to_phys for " Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  2025-01-24 20:18 ` [PATCH v2 6/6] iommu/s390: allow larger region tables Matthew Rosato
  5 siblings, 0 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

Map and unmap ops use the shared dma_walk_cpu_trans routine, update
this using the origin_type of the dma_table to determine how many
table levels must be walked.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/iommu/s390-iommu.c | 131 ++++++++++++++++++++++++++++++++++---
 1 file changed, 123 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 88adc4efc53f..4924fe9faccb 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -67,6 +67,20 @@ static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
 	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
 }
 
+static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (rso & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RFX;
+}
+
+static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (rto & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RSX;
+}
+
 static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
 {
 	*entry &= ZPCI_RTE_FLAG_MASK;
@@ -81,6 +95,22 @@ static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
 	*entry |= ZPCI_TABLE_TYPE_SX;
 }
 
+static inline void validate_rf_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RFX;
+}
+
+static inline void validate_rs_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RSX;
+}
+
 static inline void validate_rt_entry(unsigned long *entry)
 {
 	*entry &= ~ZPCI_TABLE_VALID_MASK;
@@ -286,6 +316,60 @@ static unsigned long *dma_alloc_page_table(gfp_t gfp)
 	return table;
 }
 
+static unsigned long *dma_get_rs_table_origin(unsigned long *rfep, gfp_t gfp)
+{
+	unsigned long old_rfe, rfe;
+	unsigned long *rso;
+
+	rfe = READ_ONCE(*rfep);
+	if (reg_entry_isvalid(rfe)) {
+		rso = get_rf_rso(rfe);
+	} else {
+		rso = dma_alloc_cpu_table(gfp);
+		if (!rso)
+			return NULL;
+
+		set_rf_rso(&rfe, virt_to_phys(rso));
+		validate_rf_entry(&rfe);
+		entry_clr_protected(&rfe);
+
+		old_rfe = cmpxchg(rfep, ZPCI_TABLE_INVALID, rfe);
+		if (old_rfe != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(rso);
+			rso = get_rf_rso(old_rfe);
+		}
+	}
+	return rso;
+}
+
+static unsigned long *dma_get_rt_table_origin(unsigned long *rsep, gfp_t gfp)
+{
+	unsigned long old_rse, rse;
+	unsigned long *rto;
+
+	rse = READ_ONCE(*rsep);
+	if (reg_entry_isvalid(rse)) {
+		rto = get_rs_rto(rse);
+	} else {
+		rto = dma_alloc_cpu_table(gfp);
+		if (!rto)
+			return NULL;
+
+		set_rs_rto(&rse, virt_to_phys(rto));
+		validate_rs_entry(&rse);
+		entry_clr_protected(&rse);
+
+		old_rse = cmpxchg(rsep, ZPCI_TABLE_INVALID, rse);
+		if (old_rse != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(rto);
+			rto = get_rs_rto(old_rse);
+		}
+	}
+	return rto;
+}
+
 static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
 {
 	unsigned long old_rte, rte;
@@ -339,11 +423,45 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
 	return pto;
 }
 
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+static unsigned long *dma_walk_region_tables(struct s390_domain *domain,
+					     dma_addr_t dma_addr, gfp_t gfp)
+{
+	unsigned long *rfo, *rso;
+	unsigned int rfx, rsx;
+
+	switch (domain->origin_type) {
+	case ZPCI_TABLE_TYPE_RFX:
+		rfo = domain->dma_table;
+		goto walk_rf;
+	case ZPCI_TABLE_TYPE_RSX:
+		rso = domain->dma_table;
+		goto walk_rs;
+	case ZPCI_TABLE_TYPE_RTX:
+		return domain->dma_table;
+	default:
+		return NULL;
+	}
+
+walk_rf:
+	rfx = calc_rfx(dma_addr);
+	rso = dma_get_rs_table_origin(&rfo[rfx], gfp);
+	if (!rso)
+		return NULL;
+walk_rs:
+	rsx = calc_rsx(dma_addr);
+	return dma_get_rt_table_origin(&rso[rsx], gfp);
+}
+
+static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain,
+					 dma_addr_t dma_addr, gfp_t gfp)
 {
-	unsigned long *sto, *pto;
+	unsigned long *rto, *sto, *pto;
 	unsigned int rtx, sx, px;
 
+	rto = dma_walk_region_tables(domain, dma_addr, gfp);
+	if (!rto)
+		return NULL;
+
 	rtx = calc_rtx(dma_addr);
 	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
 	if (!sto)
@@ -635,8 +753,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 	int rc;
 
 	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
-					   gfp);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
 		if (unlikely(!entry)) {
 			rc = -ENOMEM;
 			goto undo_cpu_trans;
@@ -651,8 +768,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 undo_cpu_trans:
 	while (i-- > 0) {
 		dma_addr -= PAGE_SIZE;
-		entry = dma_walk_cpu_trans(s390_domain->dma_table,
-					   dma_addr, gfp);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
 		if (!entry)
 			break;
 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -669,8 +785,7 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
 	int rc = 0;
 
 	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
-					   GFP_ATOMIC);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC);
 		if (unlikely(!entry)) {
 			rc = -EINVAL;
 			break;
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 6/6] iommu/s390: allow larger region tables
  2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
                   ` (4 preceding siblings ...)
  2025-01-24 20:18 ` [PATCH v2 5/6] iommu/s390: support map/unmap " Matthew Rosato
@ 2025-01-24 20:18 ` Matthew Rosato
  5 siblings, 0 replies; 8+ messages in thread
From: Matthew Rosato @ 2025-01-24 20:18 UTC (permalink / raw)
  To: joro, will, robin.murphy, gerald.schaefer, schnelle
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

Extend the aperture calculation to consider sizes beyond the maximum
size of a region third table.  Attempt to always use the smallest
table size possible to avoid unnecessary extra steps during translation.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/pci_dma.h |  1 +
 drivers/iommu/s390-iommu.c      | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 8d8962e4fd58..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
 #define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
 
 #define ZPCI_TABLE_SIZE_RT	(1UL << 42)
+#define ZPCI_TABLE_SIZE_RS	(1UL << 53)
 
 #define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
 #define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 4924fe9faccb..c6b2bca881c3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -953,12 +953,24 @@ int zpci_init_iommu(struct zpci_dev *zdev)
 		goto out_sysfs;
 
 	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
-	aperture_size = min3(s390_iommu_aperture,
-			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
-			     zdev->end_dma - zdev->start_dma + 1);
+	aperture_size = min(s390_iommu_aperture,
+			    zdev->end_dma - zdev->start_dma + 1);
+	if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) {
+		zdev->origin_type = ZPCI_TABLE_TYPE_RTX;
+		zdev->max_table_size = ZPCI_TABLE_SIZE_RT - 1;
+	} else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) &&
+		  (zdev->dtsm & ZPCI_IOTA_DT_RS)) {
+		zdev->origin_type = ZPCI_TABLE_TYPE_RSX;
+		zdev->max_table_size = ZPCI_TABLE_SIZE_RS - 1;
+	} else if (zdev->dtsm & ZPCI_IOTA_DT_RF) {
+		zdev->origin_type = ZPCI_TABLE_TYPE_RFX;
+		zdev->max_table_size = U64_MAX;
+	} else {
+		/* Assume RTX available */
+		zdev->origin_type = ZPCI_TABLE_TYPE_RTX;
+		zdev->max_table_size = ZPCI_TABLE_SIZE_RT - 1;
+	}
 	zdev->end_dma = zdev->start_dma + aperture_size - 1;
-	zdev->origin_type = ZPCI_TABLE_TYPE_RTX;
-	zdev->max_table_size = ZPCI_TABLE_SIZE_RT - 1;
 
 	return 0;
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/6] iommu/s390: add initial fields to track table size
  2025-01-24 20:18 ` [PATCH v2 1/6] iommu/s390: add initial fields to track table size Matthew Rosato
@ 2025-02-05 15:08   ` Niklas Schnelle
  0 siblings, 0 replies; 8+ messages in thread
From: Niklas Schnelle @ 2025-02-05 15:08 UTC (permalink / raw)
  To: Matthew Rosato, joro, will, robin.murphy, gerald.schaefer
  Cc: hca, gor, agordeev, svens, borntraeger, clegoate, iommu,
	linux-kernel, linux-s390

On Fri, 2025-01-24 at 15:18 -0500, Matthew Rosato wrote:
> In preparation for allowing for additional table levels, add the
> necessary information to the zdev and s390 domain to track table
> type.  For now, these values will always be set to signify a
> region third table which is what s390-iommu always uses today.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>  arch/s390/include/asm/pci.h |  2 ++
>  drivers/iommu/s390-iommu.c  | 17 +++++++++++------
>  2 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 474e1f8d1d3c..8f8ebaeec60a 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -189,6 +189,8 @@ struct zpci_dev {
>  	struct kvm_zdev *kzdev;
>  	struct mutex kzdev_lock;
>  	spinlock_t dom_lock;		/* protect s390_domain change */
> +	u64 max_table_size;
> +	u8 origin_type;

I think ideally we would keep the origin_type in the struct s390_domain
only. For max_table_size I think we could just have an inline helper
returning the max table size given its origin_type, especially in
struct zpci_dev it feels wasteful to use 8 bytes in an already large
struct for something we can recalculate and that doesn't even change
often.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-02-05 15:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-24 20:18 [PATCH v2 0/6] iommu/s390: support additional table regions Matthew Rosato
2025-01-24 20:18 ` [PATCH v2 1/6] iommu/s390: add initial fields to track table size Matthew Rosato
2025-02-05 15:08   ` Niklas Schnelle
2025-01-24 20:18 ` [PATCH v2 2/6] s390/pci: set appropriate IOTA region type Matthew Rosato
2025-01-24 20:18 ` [PATCH v2 3/6] iommu/s390: support cleanup of additional table regions Matthew Rosato
2025-01-24 20:18 ` [PATCH v2 4/6] iommu/s390: support iova_to_phys for " Matthew Rosato
2025-01-24 20:18 ` [PATCH v2 5/6] iommu/s390: support map/unmap " Matthew Rosato
2025-01-24 20:18 ` [PATCH v2 6/6] iommu/s390: allow larger region tables Matthew Rosato

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox