* [PATCH v4 1/4] s390/pci: check for relaxed translation capability
2025-02-07 20:53 [PATCH v4 0/4] iommu/s390: add support for IOMMU passthrough Matthew Rosato
@ 2025-02-07 20:53 ` Matthew Rosato
2025-02-07 20:53 ` [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region Matthew Rosato
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Matthew Rosato @ 2025-02-07 20:53 UTC (permalink / raw)
To: joro, will, robin.murphy, gerald.schaefer, schnelle
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
For each zdev, record whether or not CLP indicates relaxed translation
capability for the associated device group.
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 2 +-
arch/s390/include/asm/pci_clp.h | 4 +++-
arch/s390/pci/pci_clp.c | 1 +
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 474e1f8d1d3c..8fe4c7a72c0b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -144,7 +144,7 @@ struct zpci_dev {
u8 util_str_avail : 1;
u8 irqs_registered : 1;
u8 tid_avail : 1;
- u8 reserved : 1;
+ u8 rtr_avail : 1; /* Relaxed translation allowed */
unsigned int devfn; /* DEVFN part of the RID*/
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 3fff2f7095c8..7ebff39c84b3 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp {
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
- u8 : 6;
+ u8 : 2;
+ u8 rtr : 1; /* Relaxed translation requirement */
+ u8 : 3;
u8 frame : 1;
u8 refresh : 1; /* TLB refresh mode */
u16 : 3;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 14bf7e8d06b7..27248686e588 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->version = response->version;
zdev->maxstbl = response->maxstbl;
zdev->dtsm = response->dtsm;
+ zdev->rtr_avail = response->rtr;
switch (response->version) {
case 1:
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region
2025-02-07 20:53 [PATCH v4 0/4] iommu/s390: add support for IOMMU passthrough Matthew Rosato
2025-02-07 20:53 ` [PATCH v4 1/4] s390/pci: check for relaxed translation capability Matthew Rosato
@ 2025-02-07 20:53 ` Matthew Rosato
2025-02-07 22:04 ` Niklas Schnelle
2025-02-07 20:53 ` [PATCH v4 3/4] iommu/s390: handle IOAT registration based on domain Matthew Rosato
2025-02-07 20:53 ` [PATCH v4 4/4] iommu/s390: implement iommu passthrough via identity domain Matthew Rosato
3 siblings, 1 reply; 10+ messages in thread
From: Matthew Rosato @ 2025-02-07 20:53 UTC (permalink / raw)
To: joro, will, robin.murphy, gerald.schaefer, schnelle
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
PCI devices on s390 have a DMA offset that is reported via CLP. In
preparation for allowing identity domains, setup the bus_dma_region
for all PCI devices using the reported CLP value.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
arch/s390/pci/pci_bus.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 857afbc4828f..b5c35b8e47a4 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -19,6 +19,7 @@
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/dma-direct.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
@@ -283,10 +284,27 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
return zbus;
}
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct bus_dma_region *map;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return;
+
+ map->cpu_start = 0;
+ map->dma_start = PAGE_ALIGN(zdev->start_dma);
+ map->size = max(PAGE_ALIGN_DOWN(zdev->end_dma + 1) - map->dma_start, 0);
+ pdev->dev.dma_range_map = map;
+}
+
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
+ pci_dma_range_setup(pdev);
+
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region
2025-02-07 20:53 ` [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region Matthew Rosato
@ 2025-02-07 22:04 ` Niklas Schnelle
2025-02-12 15:23 ` Matthew Rosato
0 siblings, 1 reply; 10+ messages in thread
From: Niklas Schnelle @ 2025-02-07 22:04 UTC (permalink / raw)
To: Matthew Rosato, joro, will, robin.murphy, gerald.schaefer
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390, Halil Pasic
On Fri, 2025-02-07 at 15:53 -0500, Matthew Rosato wrote:
> PCI devices on s390 have a DMA offset that is reported via CLP. In
> preparation for allowing identity domains, setup the bus_dma_region
> for all PCI devices using the reported CLP value.
>
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> arch/s390/pci/pci_bus.c | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
>
> diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
> index 857afbc4828f..b5c35b8e47a4 100644
> --- a/arch/s390/pci/pci_bus.c
> +++ b/arch/s390/pci/pci_bus.c
> @@ -19,6 +19,7 @@
> #include <linux/jump_label.h>
> #include <linux/pci.h>
> #include <linux/printk.h>
> +#include <linux/dma-direct.h>
>
> #include <asm/pci_clp.h>
> #include <asm/pci_dma.h>
> @@ -283,10 +284,27 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
> return zbus;
> }
>
> +static void pci_dma_range_setup(struct pci_dev *pdev)
> +{
> + struct zpci_dev *zdev = to_zpci(pdev);
> + struct bus_dma_region *map;
> +
> + map = kzalloc(sizeof(*map), GFP_KERNEL);
> + if (!map)
> + return;
> +
> + map->cpu_start = 0;
> + map->dma_start = PAGE_ALIGN(zdev->start_dma);
> + map->size = max(PAGE_ALIGN_DOWN(zdev->end_dma + 1) - map->dma_start, 0);
Ugh, this is my fault as I suggested it, but this max() doesn't work
here. The zdev->end_dma is unsigned and so is map->dma_start so if the
former is smaller underflow will occur and the max() won't save us.
It's largely a theoretical issue since zdev->end_dma + 1 should always
be larger than zdev->start_dma, but now the max() looks like we thought
of that, but then it doesn't work.
If we handle it maybe just go with:
aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
if (aligned_end >= map->dma_start)
map->size = aligned_end - map->dma_start;
else
map->size = 0;
> + pdev->dev.dma_range_map = map;
> +}
> +
> void pcibios_bus_add_device(struct pci_dev *pdev)
> {
> struct zpci_dev *zdev = to_zpci(pdev);
>
> + pci_dma_range_setup(pdev);
> +
> /*
> * With pdev->no_vf_scan the common PCI probing code does not
> * perform PF/VF linking.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region
2025-02-07 22:04 ` Niklas Schnelle
@ 2025-02-12 15:23 ` Matthew Rosato
2025-02-12 16:15 ` Niklas Schnelle
0 siblings, 1 reply; 10+ messages in thread
From: Matthew Rosato @ 2025-02-12 15:23 UTC (permalink / raw)
To: Niklas Schnelle, joro, will, robin.murphy, gerald.schaefer
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390, Halil Pasic
>> +static void pci_dma_range_setup(struct pci_dev *pdev)
>> +{
>> + struct zpci_dev *zdev = to_zpci(pdev);
>> + struct bus_dma_region *map;
>> +
>> + map = kzalloc(sizeof(*map), GFP_KERNEL);
>> + if (!map)
>> + return;
>> +
>> + map->cpu_start = 0;
>> + map->dma_start = PAGE_ALIGN(zdev->start_dma);
>> + map->size = max(PAGE_ALIGN_DOWN(zdev->end_dma + 1) - map->dma_start, 0);
>
> Ugh, this is my fault as I suggested it, but this max() doesn't work
> here. The zdev->end_dma is unsigned and so is map->dma_start so if the
> former is smaller underflow will occur and the max() won't save us.
> It's largely a theoretical issue since zdev->end_dma + 1 should always
> be larger than zdev->start_dma, but now the max() looks like we thought
> of that, but then it doesn't work.
>
> If we handle it maybe just go with:
>
> aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
> if (aligned_end >= map->dma_start)
> map->size = aligned_end - map->dma_start;
> else
> map->size = 0;
>
Given that it's not really something that's supposed to happen, would it make sense then to add a
WARN_ON_ONCE(map->size == 0);
At the end of this?
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region
2025-02-12 15:23 ` Matthew Rosato
@ 2025-02-12 16:15 ` Niklas Schnelle
0 siblings, 0 replies; 10+ messages in thread
From: Niklas Schnelle @ 2025-02-12 16:15 UTC (permalink / raw)
To: Matthew Rosato, joro, will, robin.murphy, gerald.schaefer
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390, Halil Pasic
On Wed, 2025-02-12 at 10:23 -0500, Matthew Rosato wrote:
> > > +static void pci_dma_range_setup(struct pci_dev *pdev)
> > > +{
> > > + struct zpci_dev *zdev = to_zpci(pdev);
> > > + struct bus_dma_region *map;
> > > +
> > > + map = kzalloc(sizeof(*map), GFP_KERNEL);
> > > + if (!map)
> > > + return;
> > > +
> > > + map->cpu_start = 0;
> > > + map->dma_start = PAGE_ALIGN(zdev->start_dma);
> > > + map->size = max(PAGE_ALIGN_DOWN(zdev->end_dma + 1) - map->dma_start, 0);
> >
> > Ugh, this is my fault as I suggested it, but this max() doesn't work
> > here. The zdev->end_dma is unsigned and so is map->dma_start so if the
> > former is smaller underflow will occur and the max() won't save us.
> > It's largely a theoretical issue since zdev->end_dma + 1 should always
> > be larger than zdev->start_dma, but now the max() looks like we thought
> > of that, but then it doesn't work.
> >
> > If we handle it maybe just go with:
> >
> > aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
> > if (aligned_end >= map->dma_start)
> > map->size = aligned_end - map->dma_start;
> > else
> > map->size = 0;
> >
>
>
> Given that it's not really something that's supposed to happen, would it make sense then to add a
>
> WARN_ON_ONCE(map->size == 0);
>
> At the end of this?
Yes that makes sense to me
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v4 3/4] iommu/s390: handle IOAT registration based on domain
2025-02-07 20:53 [PATCH v4 0/4] iommu/s390: add support for IOMMU passthrough Matthew Rosato
2025-02-07 20:53 ` [PATCH v4 1/4] s390/pci: check for relaxed translation capability Matthew Rosato
2025-02-07 20:53 ` [PATCH v4 2/4] s390/pci: store DMA offset in bus_dma_region Matthew Rosato
@ 2025-02-07 20:53 ` Matthew Rosato
2025-02-10 11:44 ` Niklas Schnelle
2025-02-07 20:53 ` [PATCH v4 4/4] iommu/s390: implement iommu passthrough via identity domain Matthew Rosato
3 siblings, 1 reply; 10+ messages in thread
From: Matthew Rosato @ 2025-02-07 20:53 UTC (permalink / raw)
To: joro, will, robin.murphy, gerald.schaefer, schnelle
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
At this point, the dma_table is really a property of the s390-iommu
domain. Rather than checking its contents elsewhere in the codebase,
move the code that registers the table with firmware into
s390-iommu and make a decision what to register with firmware based
upon the type of domain in use for the device in question.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
arch/s390/include/asm/pci.h | 2 ++
arch/s390/kvm/pci.c | 17 ++-------------
arch/s390/pci/pci.c | 35 +++++++++++++++++-------------
arch/s390/pci/pci_sysfs.c | 11 +---------
drivers/iommu/s390-iommu.c | 43 +++++++++++++++++++++++++++++++++++--
5 files changed, 66 insertions(+), 42 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 8fe4c7a72c0b..7cc6a7cc66e7 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -217,6 +217,7 @@ extern struct airq_iv *zpci_aif_sbv;
struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
int zpci_add_device(struct zpci_dev *zdev);
int zpci_enable_device(struct zpci_dev *);
+int zpci_reenable_device(struct zpci_dev *zdev);
int zpci_disable_device(struct zpci_dev *);
int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
int zpci_deconfigure_device(struct zpci_dev *zdev);
@@ -245,6 +246,7 @@ void update_uid_checking(bool new);
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status);
#ifdef CONFIG_PCI
static inline bool zpci_use_mio(struct zpci_dev *zdev)
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 9b9e7fdd5380..8c40154ff50f 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{
struct zpci_dev *zdev = opaque;
- u8 status;
int rc;
if (!zdev)
@@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
*/
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
- rc = zpci_enable_device(zdev);
- if (rc)
- goto clear_gisa;
-
- /* Re-register the IOMMU that was already created */
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ rc = zpci_reenable_device(zdev);
if (rc)
goto clear_gisa;
@@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
{
struct zpci_dev *zdev = opaque;
struct kvm *kvm;
- u8 status;
if (!zdev)
return;
@@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
goto out;
}
- if (zpci_enable_device(zdev))
- goto out;
-
- /* Re-register the IOMMU that was already created */
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ zpci_reenable_device(zdev);
out:
spin_lock(&kvm->arch.kzdev_list_lock);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 88f72745fa59..fa879351efb5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -124,14 +124,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
struct zpci_fib fib = {0};
u8 cc;
- WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
/* Work around off by one in ISM virt device */
if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
fib.pal = limit + (1 << 12);
else
fib.pal = limit;
- fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.iota = iota;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
if (cc)
@@ -690,6 +689,23 @@ int zpci_enable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+ u8 status;
+ int rc;
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ rc = zpci_iommu_register_ioat(zdev, &status);
+ if (rc)
+ zpci_disable_device(zdev);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
+
int zpci_disable_device(struct zpci_dev *zdev)
{
u32 fh = zdev->fh;
@@ -739,7 +755,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- u8 status;
int rc;
lockdep_assert_held(&zdev->state_lock);
@@ -758,19 +773,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
return rc;
}
- rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
-
- if (zdev->dma_table)
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
+ rc = zpci_reenable_device(zdev);
- return 0;
+ return rc;
}
/**
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 2de1ea6c3a8c..0ecad08e1b1e 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled);
static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
- u8 status;
int ret;
pci_stop_and_remove_bus_device(pdev);
@@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
return ret;
}
- ret = zpci_enable_device(zdev);
- if (ret)
- return ret;
+ ret = zpci_reenable_device(zdev);
- if (zdev->dma_table) {
- ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (ret)
- zpci_disable_device(zdev);
- }
return ret;
}
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbdeded3d48b..007ccfdad495 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -381,6 +381,46 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev,
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
+static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
+ struct iommu_domain *domain, u8 *status)
+{
+ struct s390_domain *s390_domain;
+ int rc = 0;
+ u64 iota;
+
+ switch (domain->type) {
+ case IOMMU_DOMAIN_IDENTITY:
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, 0, status);
+ break;
+ case IOMMU_DOMAIN_BLOCKED:
+ /* Nothing to do in this case */
+ break;
+ default:
+ s390_domain = to_s390_domain(domain);
+ iota = virt_to_phys(s390_domain->dma_table) |
+ ZPCI_IOTA_RTTO_FLAG;
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, iota, status);
+ }
+
+ return rc;
+}
+
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
+{
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+
+ rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
+
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
+ return rc;
+}
+
static int blocking_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -422,8 +462,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
blocking_domain_attach_device(&blocking_domain, dev);
/* If we fail now DMA remains blocked via blocking domain */
- cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(s390_domain->dma_table), &status);
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
zdev->dma_table = s390_domain->dma_table;
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v4 3/4] iommu/s390: handle IOAT registration based on domain
2025-02-07 20:53 ` [PATCH v4 3/4] iommu/s390: handle IOAT registration based on domain Matthew Rosato
@ 2025-02-10 11:44 ` Niklas Schnelle
0 siblings, 0 replies; 10+ messages in thread
From: Niklas Schnelle @ 2025-02-10 11:44 UTC (permalink / raw)
To: Matthew Rosato, joro, will, robin.murphy, gerald.schaefer
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
On Fri, 2025-02-07 at 15:53 -0500, Matthew Rosato wrote:
> At this point, the dma_table is really a property of the s390-iommu
> domain. Rather than checking its contents elsewhere in the codebase,
> move the code that registers the table with firmware into
> s390-iommu and make a decision what to register with firmware based
> upon the type of domain in use for the device in question.
>
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> arch/s390/include/asm/pci.h | 2 ++
> arch/s390/kvm/pci.c | 17 ++-------------
> arch/s390/pci/pci.c | 35 +++++++++++++++++-------------
> arch/s390/pci/pci_sysfs.c | 11 +---------
> drivers/iommu/s390-iommu.c | 43 +++++++++++++++++++++++++++++++++++--
> 5 files changed, 66 insertions(+), 42 deletions(-)
>
>
--8<---
> int zpci_hot_reset_device(struct zpci_dev *zdev)
> {
> - u8 status;
> int rc;
>
> lockdep_assert_held(&zdev->state_lock);
> @@ -758,19 +773,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
> return rc;
> }
>
> - rc = zpci_enable_device(zdev);
> - if (rc)
> - return rc;
> -
> - if (zdev->dma_table)
> - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table), &status);
> - if (rc) {
> - zpci_disable_device(zdev);
> - return rc;
> - }
> + rc = zpci_reenable_device(zdev);
>
> - return 0;
> + return rc;
I can confirm that this change to re-do the I/O address translation re-
registration fixes the below zpci_hot_reset_device() test which in an
iommu.passthrough=1 guest was causing host IOMMU violations in v3:
# echo 'bus' > /sys/bus/pci/devices/2003\:00\:00.0/reset_method
# echo 1 > /sys/bus/pci/devices/2003\:00\:00.0/reset
> }
>
---8<---
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index fbdeded3d48b..007ccfdad495 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -381,6 +381,46 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev,
> spin_unlock_irqrestore(&zdev->dom_lock, flags);
> }
>
> +static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
> + struct iommu_domain *domain, u8 *status)
> +{
> + struct s390_domain *s390_domain;
> + int rc = 0;
> + u64 iota;
> +
> + switch (domain->type) {
> + case IOMMU_DOMAIN_IDENTITY:
> + rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
> + zdev->end_dma, 0, status);
> + break;
> + case IOMMU_DOMAIN_BLOCKED:
> + /* Nothing to do in this case */
> + break;
> + default:
> + s390_domain = to_s390_domain(domain);
> + iota = virt_to_phys(s390_domain->dma_table) |
> + ZPCI_IOTA_RTTO_FLAG;
> + rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
> + zdev->end_dma, iota, status);
> + }
> +
> + return rc;
> +}
> +
> +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
> +{
> + unsigned long flags;
> + int rc;
> +
> + spin_lock_irqsave(&zdev->dom_lock, flags);
> +
> + rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
> +
> + spin_unlock_irqrestore(&zdev->dom_lock, flags);
> +
> + return rc;
> +}
> +
>
I really like how this takes out more of the IOMMU details from non-
IOMMU code. Definitely an improvement in terms of on its own. As stated
above I tested that this fixes the one issue I stumbled over in testing
the previous version. So feel free to add:
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v4 4/4] iommu/s390: implement iommu passthrough via identity domain
2025-02-07 20:53 [PATCH v4 0/4] iommu/s390: add support for IOMMU passthrough Matthew Rosato
` (2 preceding siblings ...)
2025-02-07 20:53 ` [PATCH v4 3/4] iommu/s390: handle IOAT registration based on domain Matthew Rosato
@ 2025-02-07 20:53 ` Matthew Rosato
2025-02-10 11:47 ` Niklas Schnelle
3 siblings, 1 reply; 10+ messages in thread
From: Matthew Rosato @ 2025-02-07 20:53 UTC (permalink / raw)
To: joro, will, robin.murphy, gerald.schaefer, schnelle
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
Enabled via the kernel command-line 'iommu.passthrough=1' option.
Introduce the concept of identity domains to s390-iommu, which relies on
the bus_dma_region to offset identity mappings to the start of the DMA
aperture advertized by CLP.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
drivers/iommu/s390-iommu.c | 95 +++++++++++++++++++++++++++++---------
1 file changed, 72 insertions(+), 23 deletions(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 007ccfdad495..e1c76e0f9c2b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -16,7 +16,7 @@
#include "dma-iommu.h"
-static const struct iommu_ops s390_iommu_ops;
+static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
@@ -432,9 +432,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain,
return 0;
s390_domain = to_s390_domain(zdev->s390_domain);
- spin_lock_irqsave(&s390_domain->list_lock, flags);
- list_del_rcu(&zdev->iommu_list);
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ if (zdev->dma_table) {
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_del_rcu(&zdev->iommu_list);
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ }
zpci_unregister_ioat(zdev, 0);
zdev->dma_table = NULL;
@@ -762,7 +764,13 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_err;
- rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
+ if (zdev->rtr_avail) {
+ rc = iommu_device_register(&zdev->iommu_dev,
+ &s390_iommu_rtr_ops, NULL);
+ } else {
+ rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
+ NULL);
+ }
if (rc)
goto out_sysfs;
@@ -826,6 +834,39 @@ static int __init s390_iommu_init(void)
}
subsys_initcall(s390_iommu_init);
+static int s390_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ u8 status;
+ int cc;
+
+ blocking_domain_attach_device(&blocking_domain, dev);
+
+ /* If we fail now DMA remains blocked via blocking domain */
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
+
+ /*
+ * If the device is undergoing error recovery the reset code
+ * will re-establish the new domain.
+ */
+ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ return -EIO;
+
+ zdev_s390_domain_update(zdev, domain);
+
+ return 0;
+}
+
+static const struct iommu_domain_ops s390_identity_ops = {
+ .attach_dev = s390_attach_dev_identity,
+};
+
+static struct iommu_domain s390_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &s390_identity_ops,
+};
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
@@ -833,23 +874,31 @@ static struct iommu_domain blocking_domain = {
}
};
-static const struct iommu_ops s390_iommu_ops = {
- .blocked_domain = &blocking_domain,
- .release_domain = &blocking_domain,
- .capable = s390_iommu_capable,
- .domain_alloc_paging = s390_domain_alloc_paging,
- .probe_device = s390_iommu_probe_device,
- .device_group = generic_device_group,
- .pgsize_bitmap = SZ_4K,
- .get_resv_regions = s390_iommu_get_resv_regions,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = s390_iommu_attach_device,
- .map_pages = s390_iommu_map_pages,
- .unmap_pages = s390_iommu_unmap_pages,
- .flush_iotlb_all = s390_iommu_flush_iotlb_all,
- .iotlb_sync = s390_iommu_iotlb_sync,
- .iotlb_sync_map = s390_iommu_iotlb_sync_map,
- .iova_to_phys = s390_iommu_iova_to_phys,
- .free = s390_domain_free,
+#define S390_IOMMU_COMMON_OPS() \
+ .blocked_domain = &blocking_domain, \
+ .release_domain = &blocking_domain, \
+ .capable = s390_iommu_capable, \
+ .domain_alloc_paging = s390_domain_alloc_paging, \
+ .probe_device = s390_iommu_probe_device, \
+ .device_group = generic_device_group, \
+ .pgsize_bitmap = SZ_4K, \
+ .get_resv_regions = s390_iommu_get_resv_regions, \
+ .default_domain_ops = &(const struct iommu_domain_ops) { \
+ .attach_dev = s390_iommu_attach_device, \
+ .map_pages = s390_iommu_map_pages, \
+ .unmap_pages = s390_iommu_unmap_pages, \
+ .flush_iotlb_all = s390_iommu_flush_iotlb_all, \
+ .iotlb_sync = s390_iommu_iotlb_sync, \
+ .iotlb_sync_map = s390_iommu_iotlb_sync_map, \
+ .iova_to_phys = s390_iommu_iova_to_phys, \
+ .free = s390_domain_free, \
}
+
+static const struct iommu_ops s390_iommu_ops = {
+ S390_IOMMU_COMMON_OPS()
+};
+
+static const struct iommu_ops s390_iommu_rtr_ops = {
+ .identity_domain = &s390_identity_domain,
+ S390_IOMMU_COMMON_OPS()
};
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v4 4/4] iommu/s390: implement iommu passthrough via identity domain
2025-02-07 20:53 ` [PATCH v4 4/4] iommu/s390: implement iommu passthrough via identity domain Matthew Rosato
@ 2025-02-10 11:47 ` Niklas Schnelle
0 siblings, 0 replies; 10+ messages in thread
From: Niklas Schnelle @ 2025-02-10 11:47 UTC (permalink / raw)
To: Matthew Rosato, joro, will, robin.murphy, gerald.schaefer
Cc: hca, gor, agordeev, svens, borntraeger, farman, clegoate, jgg,
iommu, linux-kernel, linux-s390
On Fri, 2025-02-07 at 15:53 -0500, Matthew Rosato wrote:
> Enabled via the kernel command-line 'iommu.passthrough=1' option.
>
> Introduce the concept of identity domains to s390-iommu, which relies on
> the bus_dma_region to offset identity mappings to the start of the DMA
> aperture advertized by CLP.
>
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> drivers/iommu/s390-iommu.c | 95 +++++++++++++++++++++++++++++---------
> 1 file changed, 72 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 007ccfdad495..e1c76e0f9c2b 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -16,7 +16,7 @@
>
> #include "dma-iommu.h"
>
> -static const struct iommu_ops s390_iommu_ops;
> +static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
>
> static struct kmem_cache *dma_region_table_cache;
> static struct kmem_cache *dma_page_table_cache;
> @@ -432,9 +432,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain,
> return 0;
>
> s390_domain = to_s390_domain(zdev->s390_domain);
> - spin_lock_irqsave(&s390_domain->list_lock, flags);
> - list_del_rcu(&zdev->iommu_list);
> - spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> + if (zdev->dma_table) {
> + spin_lock_irqsave(&s390_domain->list_lock, flags);
> + list_del_rcu(&zdev->iommu_list);
> + spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> + }
>
> zpci_unregister_ioat(zdev, 0);
> zdev->dma_table = NULL;
> @@ -762,7 +764,13 @@ int zpci_init_iommu(struct zpci_dev *zdev)
> if (rc)
> goto out_err;
>
> - rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
> + if (zdev->rtr_avail) {
> + rc = iommu_device_register(&zdev->iommu_dev,
> + &s390_iommu_rtr_ops, NULL);
> + } else {
> + rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
> + NULL);
> + }
> if (rc)
> goto out_sysfs;
>
> @@ -826,6 +834,39 @@ static int __init s390_iommu_init(void)
> }
> subsys_initcall(s390_iommu_init);
>
> +static int s390_attach_dev_identity(struct iommu_domain *domain,
> + struct device *dev)
> +{
> + struct zpci_dev *zdev = to_zpci_dev(dev);
> + u8 status;
> + int cc;
> +
> + blocking_domain_attach_device(&blocking_domain, dev);
> +
> + /* If we fail now DMA remains blocked via blocking domain */
> + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
> +
> + /*
> + * If the device is undergoing error recovery the reset code
> + * will re-establish the new domain.
> + */
> + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
> + return -EIO;
> +
> + zdev_s390_domain_update(zdev, domain);
> +
> + return 0;
> +}
> +
> +static const struct iommu_domain_ops s390_identity_ops = {
> + .attach_dev = s390_attach_dev_identity,
> +};
> +
> +static struct iommu_domain s390_identity_domain = {
> + .type = IOMMU_DOMAIN_IDENTITY,
> + .ops = &s390_identity_ops,
> +};
> +
> static struct iommu_domain blocking_domain = {
> .type = IOMMU_DOMAIN_BLOCKED,
> .ops = &(const struct iommu_domain_ops) {
> @@ -833,23 +874,31 @@ static struct iommu_domain blocking_domain = {
> }
> };
>
> -static const struct iommu_ops s390_iommu_ops = {
> - .blocked_domain = &blocking_domain,
> - .release_domain = &blocking_domain,
> - .capable = s390_iommu_capable,
> - .domain_alloc_paging = s390_domain_alloc_paging,
> - .probe_device = s390_iommu_probe_device,
> - .device_group = generic_device_group,
> - .pgsize_bitmap = SZ_4K,
> - .get_resv_regions = s390_iommu_get_resv_regions,
> - .default_domain_ops = &(const struct iommu_domain_ops) {
> - .attach_dev = s390_iommu_attach_device,
> - .map_pages = s390_iommu_map_pages,
> - .unmap_pages = s390_iommu_unmap_pages,
> - .flush_iotlb_all = s390_iommu_flush_iotlb_all,
> - .iotlb_sync = s390_iommu_iotlb_sync,
> - .iotlb_sync_map = s390_iommu_iotlb_sync_map,
> - .iova_to_phys = s390_iommu_iova_to_phys,
> - .free = s390_domain_free,
> +#define S390_IOMMU_COMMON_OPS() \
> + .blocked_domain = &blocking_domain, \
> + .release_domain = &blocking_domain, \
> + .capable = s390_iommu_capable, \
> + .domain_alloc_paging = s390_domain_alloc_paging, \
> + .probe_device = s390_iommu_probe_device, \
> + .device_group = generic_device_group, \
> + .pgsize_bitmap = SZ_4K, \
> + .get_resv_regions = s390_iommu_get_resv_regions, \
> + .default_domain_ops = &(const struct iommu_domain_ops) { \
> + .attach_dev = s390_iommu_attach_device, \
> + .map_pages = s390_iommu_map_pages, \
> + .unmap_pages = s390_iommu_unmap_pages, \
> + .flush_iotlb_all = s390_iommu_flush_iotlb_all, \
> + .iotlb_sync = s390_iommu_iotlb_sync, \
> + .iotlb_sync_map = s390_iommu_iotlb_sync_map, \
> + .iova_to_phys = s390_iommu_iova_to_phys, \
> + .free = s390_domain_free, \
> }
> +
> +static const struct iommu_ops s390_iommu_ops = {
> + S390_IOMMU_COMMON_OPS()
> +};
> +
> +static const struct iommu_ops s390_iommu_rtr_ops = {
> + .identity_domain = &s390_identity_domain,
> + S390_IOMMU_COMMON_OPS()
> };
Looks good to me here and worked well in my tests too. Thank you!
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
^ permalink raw reply [flat|nested] 10+ messages in thread