public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] iommu/s390: Fix race with release_device ops
@ 2022-08-23 20:30 Matthew Rosato
  2022-08-24  8:37 ` Pierre Morel
  0 siblings, 1 reply; 6+ messages in thread
From: Matthew Rosato @ 2022-08-23 20:30 UTC (permalink / raw)
  To: iommu
  Cc: linux-s390, schnelle, pmorel, borntraeger, hca, gor,
	gerald.schaefer, agordeev, svens, joro, will, robin.murphy, jgg,
	linux-kernel

With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
calls") s390-iommu is supposed to handle dynamic switching between IOMMU
domains and the DMA API handling.  However, this commit does not
sufficiently handle the case where the device is released via a call
to the release_device op as it may occur at the same time as an opposing
attach_dev or detach_dev since the group mutex is not held over
release_device.  This was observed if the device is deconfigured during a
small window during vfio-pci initialization and can result in WARNs and
potential kernel panics.

Handle this by tracking when the device is probed/released via
dev_iommu_priv_set/get().  Ensure that once the device is released only
release_device handles the re-init of the device DMA.

Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         |  1 +
 drivers/iommu/s390-iommu.c  | 68 ++++++++++++++++++++++++++++---------
 3 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7b4cdadbc023..1295b6900e4b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -157,6 +157,7 @@ struct zpci_dev {
 	/* DMA stuff */
 	unsigned long	*dma_table;
 	spinlock_t	dma_table_lock;
+	spinlock_t	dma_domain_lock;
 	int		tlb_refresh;
 
 	spinlock_t	iommu_bitmap_lock;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 73cdc5539384..61901c1be3cc 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -832,6 +832,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
 	kref_init(&zdev->kref);
 	mutex_init(&zdev->lock);
 	mutex_init(&zdev->kzdev_lock);
+	spin_lock_init(&zdev->dma_domain_lock);
 
 	rc = zpci_init_iommu(zdev);
 	if (rc)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index c898bcbbce11..513a7ebd23b3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct s390_domain_device *domain_device;
 	unsigned long flags;
-	int cc, rc;
+	int cc, rc = 0;
 
 	if (!zdev)
 		return -ENODEV;
 
+	/* First check compatibility */
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	/* First device defines the DMA range limits */
+	if (list_empty(&s390_domain->devices)) {
+		domain->geometry.aperture_start = zdev->start_dma;
+		domain->geometry.aperture_end = zdev->end_dma;
+		domain->geometry.force_aperture = true;
+	/* Allow only devices with identical DMA range limits */
+	} else if (domain->geometry.aperture_start != zdev->start_dma ||
+		   domain->geometry.aperture_end != zdev->end_dma) {
+		rc = -EINVAL;
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+	if (rc)
+		return rc;
+
 	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
 	if (!domain_device)
 		return -ENOMEM;
 
+	/* Leave now if the device has already been released */
+	spin_lock_irqsave(&zdev->dma_domain_lock, flags);
+	if (!dev_iommu_priv_get(dev)) {
+		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
+		kfree(domain_device);
+		return 0;
+	}
+
 	if (zdev->dma_table && !zdev->s390_domain) {
 		cc = zpci_dma_exit_device(zdev);
 		if (cc) {
@@ -117,22 +141,11 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		rc = -EIO;
 		goto out_restore;
 	}
+	zdev->s390_domain = s390_domain;
+	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	/* First device defines the DMA range limits */
-	if (list_empty(&s390_domain->devices)) {
-		domain->geometry.aperture_start = zdev->start_dma;
-		domain->geometry.aperture_end = zdev->end_dma;
-		domain->geometry.force_aperture = true;
-	/* Allow only devices with identical DMA range limits */
-	} else if (domain->geometry.aperture_start != zdev->start_dma ||
-		   domain->geometry.aperture_end != zdev->end_dma) {
-		rc = -EINVAL;
-		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-		goto out_restore;
-	}
 	domain_device->zdev = zdev;
-	zdev->s390_domain = s390_domain;
 	list_add(&domain_device->list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
@@ -147,6 +160,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 				   virt_to_phys(zdev->dma_table));
 	}
 out_free:
+	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
 	kfree(domain_device);
 
 	return rc;
@@ -176,17 +190,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 	}
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
-	if (found && (zdev->s390_domain == s390_domain)) {
+	spin_lock_irqsave(&zdev->dma_domain_lock, flags);
+	if (found && (zdev->s390_domain == s390_domain) &&
+	    dev_iommu_priv_get(dev)) {
 		zdev->s390_domain = NULL;
 		zpci_unregister_ioat(zdev, 0);
 		zpci_dma_init_device(zdev);
 	}
+	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
 }
 
 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 {
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 
+	dev_iommu_priv_set(dev, zdev);
+
 	return &zdev->iommu_dev;
 }
 
@@ -194,6 +213,7 @@ static void s390_iommu_release_device(struct device *dev)
 {
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct iommu_domain *domain;
+	unsigned long flags;
 
 	/*
 	 * This is a workaround for a scenario where the IOMMU API common code
@@ -206,10 +226,26 @@ static void s390_iommu_release_device(struct device *dev)
 	 *
 	 * So let's call detach_dev from here if it hasn't been called before.
 	 */
-	if (zdev && zdev->s390_domain) {
+	if (zdev) {
+		/*
+		 * Clear priv to block further attaches for this device,
+		 * ensure detaches don't init DMA
+		 */
+		spin_lock_irqsave(&zdev->dma_domain_lock, flags);
+		dev_iommu_priv_set(dev, NULL);
+		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
+		/* Make sure this device is removed from the domain list */
 		domain = iommu_get_domain_for_dev(dev);
 		if (domain)
 			s390_iommu_detach_device(domain, dev);
+		/* Now ensure DMA is initialized from here */
+		spin_lock_irqsave(&zdev->dma_domain_lock, flags);
+		if (zdev->s390_domain) {
+			zdev->s390_domain = NULL;
+			zpci_unregister_ioat(zdev, 0);
+			zpci_dma_init_device(zdev);
+		}
+		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
 	}
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] iommu/s390: Fix race with release_device ops
  2022-08-23 20:30 [PATCH] iommu/s390: Fix race with release_device ops Matthew Rosato
@ 2022-08-24  8:37 ` Pierre Morel
  2022-08-24 20:25   ` Matthew Rosato
  0 siblings, 1 reply; 6+ messages in thread
From: Pierre Morel @ 2022-08-24  8:37 UTC (permalink / raw)
  To: Matthew Rosato, iommu
  Cc: linux-s390, schnelle, borntraeger, hca, gor, gerald.schaefer,
	agordeev, svens, joro, will, robin.murphy, jgg, linux-kernel



On 8/23/22 22:30, Matthew Rosato wrote:
> With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
> calls") s390-iommu is supposed to handle dynamic switching between IOMMU
> domains and the DMA API handling.  However, this commit does not
> sufficiently handle the case where the device is released via a call
> to the release_device op as it may occur at the same time as an opposing
> attach_dev or detach_dev since the group mutex is not held over
> release_device.  This was observed if the device is deconfigured during a
> small window during vfio-pci initialization and can result in WARNs and
> potential kernel panics.
> 
> Handle this by tracking when the device is probed/released via
> dev_iommu_priv_set/get().  Ensure that once the device is released only
> release_device handles the re-init of the device DMA.
> 
> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   arch/s390/include/asm/pci.h |  1 +
>   arch/s390/pci/pci.c         |  1 +
>   drivers/iommu/s390-iommu.c  | 68 ++++++++++++++++++++++++++++---------
>   3 files changed, 54 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 7b4cdadbc023..1295b6900e4b 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -157,6 +157,7 @@ struct zpci_dev {
>   	/* DMA stuff */
>   	unsigned long	*dma_table;
>   	spinlock_t	dma_table_lock;
> +	spinlock_t	dma_domain_lock;
>   	int		tlb_refresh;
>   
>   	spinlock_t	iommu_bitmap_lock;
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 73cdc5539384..61901c1be3cc 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -832,6 +832,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
>   	kref_init(&zdev->kref);
>   	mutex_init(&zdev->lock);
>   	mutex_init(&zdev->kzdev_lock);
> +	spin_lock_init(&zdev->dma_domain_lock);
>   
>   	rc = zpci_init_iommu(zdev);
>   	if (rc)
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index c898bcbbce11..513a7ebd23b3 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>   	struct zpci_dev *zdev = to_zpci_dev(dev);
>   	struct s390_domain_device *domain_device;
>   	unsigned long flags;
> -	int cc, rc;
> +	int cc, rc = 0;
>   
>   	if (!zdev)
>   		return -ENODEV;
>   
> +	/* First check compatibility */
> +	spin_lock_irqsave(&s390_domain->list_lock, flags);
> +	/* First device defines the DMA range limits */
> +	if (list_empty(&s390_domain->devices)) {
> +		domain->geometry.aperture_start = zdev->start_dma;
> +		domain->geometry.aperture_end = zdev->end_dma;
> +		domain->geometry.force_aperture = true;
> +	/* Allow only devices with identical DMA range limits */
> +	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> +		   domain->geometry.aperture_end != zdev->end_dma) {
> +		rc = -EINVAL;
> +	}
> +	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> +	if (rc)
> +		return rc;
> +
>   	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
>   	if (!domain_device)
>   		return -ENOMEM;
>   
> +	/* Leave now if the device has already been released */
> +	spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> +	if (!dev_iommu_priv_get(dev)) {
> +		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
> +		kfree(domain_device);
> +		return 0;
> +	}
> +
>   	if (zdev->dma_table && !zdev->s390_domain) {
>   		cc = zpci_dma_exit_device(zdev);
>   		if (cc) {

Am I wrong? It seems to me that zpci_dma_exit_device here is called with 
the spin_lock locked but this function zpci_dma_exit_device calls vfree 
which may sleep.


> @@ -117,22 +141,11 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>   		rc = -EIO;
>   		goto out_restore;
>   	}
> +	zdev->s390_domain = s390_domain;
> +	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>   
>   	spin_lock_irqsave(&s390_domain->list_lock, flags);
> -	/* First device defines the DMA range limits */
> -	if (list_empty(&s390_domain->devices)) {
> -		domain->geometry.aperture_start = zdev->start_dma;
> -		domain->geometry.aperture_end = zdev->end_dma;
> -		domain->geometry.force_aperture = true;
> -	/* Allow only devices with identical DMA range limits */
> -	} else if (domain->geometry.aperture_start != zdev->start_dma ||
> -		   domain->geometry.aperture_end != zdev->end_dma) {
> -		rc = -EINVAL;
> -		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> -		goto out_restore;
> -	}
>   	domain_device->zdev = zdev;
> -	zdev->s390_domain = s390_domain;
>   	list_add(&domain_device->list, &s390_domain->devices);
>   	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>   
> @@ -147,6 +160,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>   				   virt_to_phys(zdev->dma_table));
>   	}
>   out_free:
> +	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>   	kfree(domain_device);
>   
>   	return rc;
> @@ -176,17 +190,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
>   	}
>   	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>   
> -	if (found && (zdev->s390_domain == s390_domain)) {
> +	spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> +	if (found && (zdev->s390_domain == s390_domain) &&
> +	    dev_iommu_priv_get(dev)) {
>   		zdev->s390_domain = NULL;
>   		zpci_unregister_ioat(zdev, 0);
>   		zpci_dma_init_device(zdev);
>   	}
> +	spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>   }
>   
>   static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>   {
>   	struct zpci_dev *zdev = to_zpci_dev(dev);
>   
> +	dev_iommu_priv_set(dev, zdev);
> +
>   	return &zdev->iommu_dev;
>   }
>   
> @@ -194,6 +213,7 @@ static void s390_iommu_release_device(struct device *dev)
>   {
>   	struct zpci_dev *zdev = to_zpci_dev(dev);
>   	struct iommu_domain *domain;
> +	unsigned long flags;
>   
>   	/*
>   	 * This is a workaround for a scenario where the IOMMU API common code
> @@ -206,10 +226,26 @@ static void s390_iommu_release_device(struct device *dev)
>   	 *
>   	 * So let's call detach_dev from here if it hasn't been called before.
>   	 */
> -	if (zdev && zdev->s390_domain) {
> +	if (zdev) {
> +		/*
> +		 * Clear priv to block further attaches for this device,
> +		 * ensure detaches don't init DMA
> +		 */
> +		spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> +		dev_iommu_priv_set(dev, NULL);
> +		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
> +		/* Make sure this device is removed from the domain list */
>   		domain = iommu_get_domain_for_dev(dev);
>   		if (domain)
>   			s390_iommu_detach_device(domain, dev);
> +		/* Now ensure DMA is initialized from here */
> +		spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> +		if (zdev->s390_domain) {
> +			zdev->s390_domain = NULL;
> +			zpci_unregister_ioat(zdev, 0);
> +			zpci_dma_init_device(zdev);


If I do not make a mistake, zpci_dma_init_device() calls vzalloc() and 
dma_alloc_cpu_table() which both could sleep.


> +		}
> +		spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>   	}
>   }
>   
> 

-- 
Pierre Morel
IBM Lab Boeblingen

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iommu/s390: Fix race with release_device ops
  2022-08-24  8:37 ` Pierre Morel
@ 2022-08-24 20:25   ` Matthew Rosato
  2022-08-25  7:22     ` Alexander Gordeev
  0 siblings, 1 reply; 6+ messages in thread
From: Matthew Rosato @ 2022-08-24 20:25 UTC (permalink / raw)
  To: Pierre Morel, iommu
  Cc: linux-s390, schnelle, borntraeger, hca, gor, gerald.schaefer,
	agordeev, svens, joro, will, robin.murphy, jgg, linux-kernel

On 8/24/22 4:37 AM, Pierre Morel wrote:
> 
> 
> On 8/23/22 22:30, Matthew Rosato wrote:
>> With commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
>> calls") s390-iommu is supposed to handle dynamic switching between IOMMU
>> domains and the DMA API handling.  However, this commit does not
>> sufficiently handle the case where the device is released via a call
>> to the release_device op as it may occur at the same time as an opposing
>> attach_dev or detach_dev since the group mutex is not held over
>> release_device.  This was observed if the device is deconfigured during a
>> small window during vfio-pci initialization and can result in WARNs and
>> potential kernel panics.
>>
>> Handle this by tracking when the device is probed/released via
>> dev_iommu_priv_set/get().  Ensure that once the device is released only
>> release_device handles the re-init of the device DMA.
>>
>> Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
>> ---
>>   arch/s390/include/asm/pci.h |  1 +
>>   arch/s390/pci/pci.c         |  1 +
>>   drivers/iommu/s390-iommu.c  | 68 ++++++++++++++++++++++++++++---------
>>   3 files changed, 54 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
>> index 7b4cdadbc023..1295b6900e4b 100644
>> --- a/arch/s390/include/asm/pci.h
>> +++ b/arch/s390/include/asm/pci.h
>> @@ -157,6 +157,7 @@ struct zpci_dev {
>>       /* DMA stuff */
>>       unsigned long    *dma_table;
>>       spinlock_t    dma_table_lock;
>> +    spinlock_t    dma_domain_lock;
>>       int        tlb_refresh;
>>         spinlock_t    iommu_bitmap_lock;
>> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
>> index 73cdc5539384..61901c1be3cc 100644
>> --- a/arch/s390/pci/pci.c
>> +++ b/arch/s390/pci/pci.c
>> @@ -832,6 +832,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
>>       kref_init(&zdev->kref);
>>       mutex_init(&zdev->lock);
>>       mutex_init(&zdev->kzdev_lock);
>> +    spin_lock_init(&zdev->dma_domain_lock);
>>         rc = zpci_init_iommu(zdev);
>>       if (rc)
>> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
>> index c898bcbbce11..513a7ebd23b3 100644
>> --- a/drivers/iommu/s390-iommu.c
>> +++ b/drivers/iommu/s390-iommu.c
>> @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>>       struct zpci_dev *zdev = to_zpci_dev(dev);
>>       struct s390_domain_device *domain_device;
>>       unsigned long flags;
>> -    int cc, rc;
>> +    int cc, rc = 0;
>>         if (!zdev)
>>           return -ENODEV;
>>   +    /* First check compatibility */
>> +    spin_lock_irqsave(&s390_domain->list_lock, flags);
>> +    /* First device defines the DMA range limits */
>> +    if (list_empty(&s390_domain->devices)) {
>> +        domain->geometry.aperture_start = zdev->start_dma;
>> +        domain->geometry.aperture_end = zdev->end_dma;
>> +        domain->geometry.force_aperture = true;
>> +    /* Allow only devices with identical DMA range limits */
>> +    } else if (domain->geometry.aperture_start != zdev->start_dma ||
>> +           domain->geometry.aperture_end != zdev->end_dma) {
>> +        rc = -EINVAL;
>> +    }
>> +    spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>> +    if (rc)
>> +        return rc;
>> +
>>       domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
>>       if (!domain_device)
>>           return -ENOMEM;
>>   +    /* Leave now if the device has already been released */
>> +    spin_lock_irqsave(&zdev->dma_domain_lock, flags);
>> +    if (!dev_iommu_priv_get(dev)) {
>> +        spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>> +        kfree(domain_device);
>> +        return 0;
>> +    }
>> +
>>       if (zdev->dma_table && !zdev->s390_domain) {
>>           cc = zpci_dma_exit_device(zdev);
>>           if (cc) {
> 
> Am I wrong? It seems to me that zpci_dma_exit_device here is called with the spin_lock locked but this function zpci_dma_exit_device calls vfree which may sleep.
> 

Oh, good point, I just enabled lockdep to verify that.

I think we could just replace this with a mutex instead, it's not a performance path.  I've been running tests successfully today with this patch modified to instead use a mutex for dma_domain_lock.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iommu/s390: Fix race with release_device ops
  2022-08-24 20:25   ` Matthew Rosato
@ 2022-08-25  7:22     ` Alexander Gordeev
  2022-08-25 11:11       ` Niklas Schnelle
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Gordeev @ 2022-08-25  7:22 UTC (permalink / raw)
  To: Matthew Rosato
  Cc: Pierre Morel, iommu, linux-s390, schnelle, borntraeger, hca, gor,
	gerald.schaefer, svens, joro, will, robin.murphy, jgg,
	linux-kernel

On Wed, Aug 24, 2022 at 04:25:19PM -0400, Matthew Rosato wrote:
> >> @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> >>       struct zpci_dev *zdev = to_zpci_dev(dev);
> >>       struct s390_domain_device *domain_device;
> >>       unsigned long flags;
> >> -    int cc, rc;
> >> +    int cc, rc = 0;
> >>         if (!zdev)
> >>           return -ENODEV;
> >>   +    /* First check compatibility */
> >> +    spin_lock_irqsave(&s390_domain->list_lock, flags);
> >> +    /* First device defines the DMA range limits */
> >> +    if (list_empty(&s390_domain->devices)) {
> >> +        domain->geometry.aperture_start = zdev->start_dma;
> >> +        domain->geometry.aperture_end = zdev->end_dma;
> >> +        domain->geometry.force_aperture = true;
> >> +    /* Allow only devices with identical DMA range limits */
> >> +    } else if (domain->geometry.aperture_start != zdev->start_dma ||
> >> +           domain->geometry.aperture_end != zdev->end_dma) {
> >> +        rc = -EINVAL;
> >> +    }
> >> +    spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> >> +    if (rc)
> >> +        return rc;
> >> +
> >>       domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
> >>       if (!domain_device)
> >>           return -ENOMEM;
> >>   +    /* Leave now if the device has already been released */
> >> +    spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> >> +    if (!dev_iommu_priv_get(dev)) {
> >> +        spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
> >> +        kfree(domain_device);
> >> +        return 0;
> >> +    }
> >> +
> >>       if (zdev->dma_table && !zdev->s390_domain) {
> >>           cc = zpci_dma_exit_device(zdev);
> >>           if (cc) {
> > 
> > Am I wrong? It seems to me that zpci_dma_exit_device here is called with the spin_lock locked but this function zpci_dma_exit_device calls vfree which may sleep.
> > 
> 
> Oh, good point, I just enabled lockdep to verify that.
> 
> I think we could just replace this with a mutex instead, it's not a performance path.  I've been running tests successfully today with this patch modified to instead use a mutex for dma_domain_lock.

But your original version uses irq-savvy spinlocks.
Are there data that need to be protected against interrupts?

Thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iommu/s390: Fix race with release_device ops
  2022-08-25  7:22     ` Alexander Gordeev
@ 2022-08-25 11:11       ` Niklas Schnelle
  2022-08-25 11:26         ` Robin Murphy
  0 siblings, 1 reply; 6+ messages in thread
From: Niklas Schnelle @ 2022-08-25 11:11 UTC (permalink / raw)
  To: Alexander Gordeev, Matthew Rosato
  Cc: Pierre Morel, iommu, linux-s390, borntraeger, hca, gor,
	gerald.schaefer, svens, joro, will, robin.murphy, jgg,
	linux-kernel

On Thu, 2022-08-25 at 09:22 +0200, Alexander Gordeev wrote:
> On Wed, Aug 24, 2022 at 04:25:19PM -0400, Matthew Rosato wrote:
> > > > @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> > > >       struct zpci_dev *zdev = to_zpci_dev(dev);
> > > >       struct s390_domain_device *domain_device;
> > > >       unsigned long flags;
> > > > -    int cc, rc;
> > > > +    int cc, rc = 0;
> > > >         if (!zdev)
> > > >           return -ENODEV;
> > > >   +    /* First check compatibility */
> > > > +    spin_lock_irqsave(&s390_domain->list_lock, flags);
> > > > +    /* First device defines the DMA range limits */
> > > > +    if (list_empty(&s390_domain->devices)) {
> > > > +        domain->geometry.aperture_start = zdev->start_dma;
> > > > +        domain->geometry.aperture_end = zdev->end_dma;
> > > > +        domain->geometry.force_aperture = true;
> > > > +    /* Allow only devices with identical DMA range limits */
> > > > +    } else if (domain->geometry.aperture_start != zdev->start_dma ||
> > > > +           domain->geometry.aperture_end != zdev->end_dma) {
> > > > +        rc = -EINVAL;
> > > > +    }
> > > > +    spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> > > > +    if (rc)
> > > > +        return rc;
> > > > +
> > > >       domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
> > > >       if (!domain_device)
> > > >           return -ENOMEM;
> > > >   +    /* Leave now if the device has already been released */
> > > > +    spin_lock_irqsave(&zdev->dma_domain_lock, flags);
> > > > +    if (!dev_iommu_priv_get(dev)) {
> > > > +        spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
> > > > +        kfree(domain_device);
> > > > +        return 0;
> > > > +    }
> > > > +
> > > >       if (zdev->dma_table && !zdev->s390_domain) {
> > > >           cc = zpci_dma_exit_device(zdev);
> > > >           if (cc) {
> > > 
> > > Am I wrong? It seems to me that zpci_dma_exit_device here is called with the spin_lock locked but this function zpci_dma_exit_device calls vfree which may sleep.
> > > 
> > 
> > Oh, good point, I just enabled lockdep to verify that.
> > 
> > I think we could just replace this with a mutex instead, it's not a performance path.  I've been running tests successfully today with this patch modified to instead use a mutex for dma_domain_lock.
> 
> But your original version uses irq-savvy spinlocks.
> Are there data that need to be protected against interrupts?
> 
> Thanks!

I think that was a carry over from my original attempt that used the
zdev->dma_domain_lock in some more places including in interrupt
context. I think these are gone now so I think Matt is right in his
version this can be a mutex.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iommu/s390: Fix race with release_device ops
  2022-08-25 11:11       ` Niklas Schnelle
@ 2022-08-25 11:26         ` Robin Murphy
  0 siblings, 0 replies; 6+ messages in thread
From: Robin Murphy @ 2022-08-25 11:26 UTC (permalink / raw)
  To: Niklas Schnelle, Alexander Gordeev, Matthew Rosato
  Cc: Pierre Morel, iommu, linux-s390, borntraeger, hca, gor,
	gerald.schaefer, svens, joro, will, jgg, linux-kernel

On 2022-08-25 12:11, Niklas Schnelle wrote:
> On Thu, 2022-08-25 at 09:22 +0200, Alexander Gordeev wrote:
>> On Wed, Aug 24, 2022 at 04:25:19PM -0400, Matthew Rosato wrote:
>>>>> @@ -90,15 +90,39 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>>>>>        struct zpci_dev *zdev = to_zpci_dev(dev);
>>>>>        struct s390_domain_device *domain_device;
>>>>>        unsigned long flags;
>>>>> -    int cc, rc;
>>>>> +    int cc, rc = 0;
>>>>>          if (!zdev)
>>>>>            return -ENODEV;
>>>>>    +    /* First check compatibility */
>>>>> +    spin_lock_irqsave(&s390_domain->list_lock, flags);
>>>>> +    /* First device defines the DMA range limits */
>>>>> +    if (list_empty(&s390_domain->devices)) {
>>>>> +        domain->geometry.aperture_start = zdev->start_dma;
>>>>> +        domain->geometry.aperture_end = zdev->end_dma;
>>>>> +        domain->geometry.force_aperture = true;
>>>>> +    /* Allow only devices with identical DMA range limits */
>>>>> +    } else if (domain->geometry.aperture_start != zdev->start_dma ||
>>>>> +           domain->geometry.aperture_end != zdev->end_dma) {
>>>>> +        rc = -EINVAL;
>>>>> +    }
>>>>> +    spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>>>>> +    if (rc)
>>>>> +        return rc;
>>>>> +
>>>>>        domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
>>>>>        if (!domain_device)
>>>>>            return -ENOMEM;
>>>>>    +    /* Leave now if the device has already been released */
>>>>> +    spin_lock_irqsave(&zdev->dma_domain_lock, flags);
>>>>> +    if (!dev_iommu_priv_get(dev)) {
>>>>> +        spin_unlock_irqrestore(&zdev->dma_domain_lock, flags);
>>>>> +        kfree(domain_device);
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>>        if (zdev->dma_table && !zdev->s390_domain) {
>>>>>            cc = zpci_dma_exit_device(zdev);
>>>>>            if (cc) {
>>>>
>>>> Am I wrong? It seems to me that zpci_dma_exit_device here is called with the spin_lock locked but this function zpci_dma_exit_device calls vfree which may sleep.
>>>>
>>>
>>> Oh, good point, I just enabled lockdep to verify that.
>>>
>>> I think we could just replace this with a mutex instead, it's not a performance path.  I've been running tests successfully today with this patch modified to instead use a mutex for dma_domain_lock.
>>
>> But your original version uses irq-savvy spinlocks.
>> Are there data that need to be protected against interrupts?
>>
>> Thanks!
> 
> I think that was a carry over from my original attempt that used the
> zdev->dma_domain_lock in some more places including in interrupt
> context. I think these are gone now so I think Matt is right in his
> version this can be a mutex.

Yes, probe/release/attach/detach should absolutely not be happening from 
atomic/IRQ context. At the very least, the IOMMU core itself needs to 
take the group mutex in those paths.

Cheers,
Robin.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-08-25 11:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-08-23 20:30 [PATCH] iommu/s390: Fix race with release_device ops Matthew Rosato
2022-08-24  8:37 ` Pierre Morel
2022-08-24 20:25   ` Matthew Rosato
2022-08-25  7:22     ` Alexander Gordeev
2022-08-25 11:11       ` Niklas Schnelle
2022-08-25 11:26         ` Robin Murphy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox