Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 2/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Thierry Reding @ 2018-05-30 14:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530140625.21247-1-thierry.reding@gmail.com>

From: Thierry Reding <treding@nvidia.com>

Depending on the kernel configuration, early ARM architecture setup code
may have attached the GPU to a DMA/IOMMU mapping that transparently uses
the IOMMU to back the DMA API. Tegra requires special handling for IOMMU
backed buffers (a special bit in the GPU's MMU page tables indicates the
memory path to take: via the SMMU or directly to the memory controller).
Transparently backing DMA memory with an IOMMU prevents Nouveau from
properly handling such memory accesses and causes memory access faults.

As a side-note: buffers other than those allocated in instance memory
don't need to be physically contiguous from the GPU's perspective since
the GPU can map them into contiguous buffers using its own MMU. Mapping
these buffers through the IOMMU is unnecessary and will even lead to
performance degradation because of the additional translation. One
exception to this are compressible buffers which need large pages. In
order to enable these large pages, multiple small pages will have to be
combined into one large (I/O virtually contiguous) mapping via the
IOMMU. However, that is a topic outside the scope of this fix and isn't
currently supported. An implementation will want to explicitly create
these large pages in the Nouveau driver, so detaching from a DMA/IOMMU
mapping would still be required.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Changes in v4:
- use existing APIs to detach from a DMA/IOMMU mapping

Changes in v3:
- clarify the use of IOMMU mapping for compressible buffers
- squash multiple patches into this

 drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 78597da6313a..0e372a190d3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -23,6 +23,10 @@
 #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
 #include "priv.h"
 
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
 static int
 nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 {
@@ -105,6 +109,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 	unsigned long pgsize_bitmap;
 	int ret;
 
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+	if (dev->archdata.mapping) {
+		struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+		arm_iommu_detach_device(dev);
+		arm_iommu_release_mapping(mapping);
+	}
+#endif
+
 	if (!tdev->func->iommu_bit)
 		return;
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH v4 1/2] ARM: dma-mapping: Set proper DMA ops in arm_iommu_detach_device()
From: Thierry Reding @ 2018-05-30 14:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530140625.21247-1-thierry.reding@gmail.com>

From: Thierry Reding <treding@nvidia.com>

Instead of setting the DMA ops pointer to NULL, set the correct,
non-IOMMU ops depending on the device's coherency setting.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Changes in v4:
- new patch to fix existing arm_iommu_detach_device() to do what we need

 arch/arm/mm/dma-mapping.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index af27f1c22d93..87a0037574e4 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1151,6 +1151,11 @@ int arm_dma_supported(struct device *dev, u64 mask)
 	return __dma_supported(dev, mask, false);
 }
 
+static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
+}
+
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 
 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
@@ -2296,7 +2301,7 @@ void arm_iommu_detach_device(struct device *dev)
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
 	to_dma_iommu_mapping(dev) = NULL;
-	set_dma_ops(dev, NULL);
+	set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent));
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
@@ -2357,11 +2362,6 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }
 
 #endif	/* CONFIG_ARM_DMA_USE_IOMMU */
 
-static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
-{
-	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
-}
-
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
-- 
2.17.0

^ permalink raw reply related

* [PATCH v4 0/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Thierry Reding @ 2018-05-30 14:06 UTC (permalink / raw)
  To: linux-arm-kernel

From: Thierry Reding <treding@nvidia.com>

An unfortunate interaction between the 32-bit ARM DMA/IOMMU mapping code
and Tegra SMMU driver changes to support IOMMU groups introduced a boot-
time regression on Tegra124. This was caught very late because none of
the standard configurations that are tested on Tegra enable the ARM DMA/
IOMMU mapping code since it is not needed.

The reason for the failure is that the GPU found on Tegra uses a special
bit in physical addresses to determine whether or not a buffer is mapped
through the SMMU. In order to achieve this, the Nouveau driver needs to
explicitly understand which buffers are mapped through the SMMU and
which aren't. Hiding usage of the SMMU behind the DMA API is bound to
fail because the knowledge doesn't exist. Furthermore, the GPU has its
own IOMMU and in most cases doesn't need buffers to be physically or
virtually contiguous. One notable exception is for compressible buffers
which need to be mapped with large pages, which in turn require all the
small pages in a large page to be contiguous. This can be achieved with
an SMMU mapping, though it isn't currently supported in Nouveau. Since
Translating through the SMMU is unnecessary and can have a negative
impact on performance for the common case, so we want to avoid it when
possible.

This series of patches adds a 32-bit ARM specific API that allows a
driver to detach the device from the DMA/IOMMU mapping so that it can
provide its own implementation for dealing with the SMMU. The second
patch makes use of that new API in the Nouveau driver to fix the
regression.

Thierry

Thierry Reding (2):
  ARM: dma-mapping: Set proper DMA ops in arm_iommu_detach_device()
  drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping

 arch/arm/mm/dma-mapping.c                          | 12 ++++++------
 drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 13 +++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

-- 
2.17.0

^ permalink raw reply

* Regression in Linux next again
From: Maciej Purski @ 2018-05-30 14:03 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530091314.GB6920@sirena.org.uk>


On 05/30/2018 11:13 AM, Mark Brown wrote:
> On Tue, May 29, 2018 at 03:15:01PM -0700, Tony Lindgren wrote:
> 
>> I think I bisected this same issue for the second time now
>> but for a different merge window. What's up with that?
> 
> Last time we just reverted it as Maciej was unable to reproduce your
> problem, he's tried again with some alterations.
> 
>> Reverting both patches fixes the issue for me. I could
>> not debug it further because of the compile error(s).
> 
> OK, unless this gets fixed really quickly I'll revert again.
> 

I'm afraid, I have no idea, how to fix it quickly. You can revert it and
in the next version I'll fix the build error and split the last patch even
more, so we could perform a more precise bisection. I'd be grateful if you
could push it on your test coupled branch and Tony could test it again before
merging it with next again.

Best regards,

Maciej Purski

^ permalink raw reply

* [PATCH v3 2/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Robin Murphy @ 2018-05-30 13:46 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530134116.GB5400@ulmo>

On 30/05/18 14:41, Thierry Reding wrote:
> On Wed, May 30, 2018 at 02:30:51PM +0100, Robin Murphy wrote:
>> On 30/05/18 14:00, Thierry Reding wrote:
>>> On Wed, May 30, 2018 at 11:30:25AM +0100, Robin Murphy wrote:
>>>> On 30/05/18 09:03, Thierry Reding wrote:
>>>>> From: Thierry Reding <treding@nvidia.com>
>>>>>
>>>>> Depending on the kernel configuration, early ARM architecture setup code
>>>>> may have attached the GPU to a DMA/IOMMU mapping that transparently uses
>>>>> the IOMMU to back the DMA API. Tegra requires special handling for IOMMU
>>>>> backed buffers (a special bit in the GPU's MMU page tables indicates the
>>>>> memory path to take: via the SMMU or directly to the memory controller).
>>>>> Transparently backing DMA memory with an IOMMU prevents Nouveau from
>>>>> properly handling such memory accesses and causes memory access faults.
>>>>>
>>>>> As a side-note: buffers other than those allocated in instance memory
>>>>> don't need to be physically contiguous from the GPU's perspective since
>>>>> the GPU can map them into contiguous buffers using its own MMU. Mapping
>>>>> these buffers through the IOMMU is unnecessary and will even lead to
>>>>> performance degradation because of the additional translation. One
>>>>> exception to this are compressible buffers which need large pages. In
>>>>> order to enable these large pages, multiple small pages will have to be
>>>>> combined into one large (I/O virtually contiguous) mapping via the
>>>>> IOMMU. However, that is a topic outside the scope of this fix and isn't
>>>>> currently supported. An implementation will want to explicitly create
>>>>> these large pages in the Nouveau driver, so detaching from a DMA/IOMMU
>>>>> mapping would still be required.
>>>>
>>>> I wonder if it might make sense to have a hook in iommu_attach_device() to
>>>> notify the arch DMA API code when moving devices between unmanaged and DMA
>>>> ops domains? That seems like it might be the most low-impact way to address
>>>> the overall problem long-term.
>>>>
>>>>> Signed-off-by: Thierry Reding <treding@nvidia.com>
>>>>> ---
>>>>> Changes in v3:
>>>>> - clarify the use of IOMMU mapping for compressible buffers
>>>>> - squash multiple patches into this
>>>>>
>>>>>     drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 5 +++++
>>>>>     1 file changed, 5 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>>>> index 78597da6313a..d0538af1b967 100644
>>>>> --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>>>> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>>>> @@ -105,6 +105,11 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
>>>>>     	unsigned long pgsize_bitmap;
>>>>>     	int ret;
>>>>> +#if IS_ENABLED(CONFIG_ARM)
>>>>
>>>> Wouldn't CONFIG_ARM_DMA_USE_IOMMU be even more appropriate?
>>>
>>> Not necessarily. arm_dma_iommu_detach_device() is always defined on ARM,
>>> only with CONFIG_ARM_DMA_USE_IOMMU=n it will be empty. So this check is
>>> a guard to make sure we don't call the function when it isn't available,
>>> but it may still not do anything.
>>
>> Calling a function under condition A, which only does anything under
>> condition B, when B depends on A, is identical in behaviour to only calling
>> the function under condition B, except needlessly harder to follow.
>>
>>>>> +	/* make sure we can use the IOMMU exclusively */
>>>>> +	arm_dma_iommu_detach_device(dev);
>>>>
>>>> As before, I would just use the existing infrastructure the same way the
>>>> Exynos DRM driver currently does in __exynos_iommu_attach() (albeit without
>>>> then reattaching to another DMA ops mapping).
>>>
>>> That's pretty much what I initially did and which was shot down by
>>> Christoph. As I said earlier, at this point I don't really care what
>>> color the shed will be. Can you and Christoph come to an agreement
>>> on what it should be?
>>
>> What I was getting at is that arm_iommu_detach_device() already *is* the
>> exact function Christoph was asking for, it just needs a minor fix instead
>> of adding explicit set_dma_ops() fiddling at its callsites which only
>> obfuscates the fact that it's supposed to be responsible for resetting the
>> device's DMA ops already.
> 
> It still has the downside of callers having to explicitly check for the
> existence of a mapping, otherwise they'll cause a warning to be printed
> to the kernel log.

Or we could look at the way it's actually used, and reconsider whether 
the warning is really appropriate. That's always an option ;)

Robin.

> That's not all that bad, though. I'll prepare version 4 with those
> changes.
> 
> Thierry
> 

^ permalink raw reply

* [PATCH v3 1/2] ARM: dma-mapping: Implement arm_dma_iommu_detach_device()
From: Robin Murphy @ 2018-05-30 13:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530131239.GA5400@ulmo>

On 30/05/18 14:12, Thierry Reding wrote:
> On Wed, May 30, 2018 at 02:54:46PM +0200, Thierry Reding wrote:
>> On Wed, May 30, 2018 at 10:59:30AM +0100, Robin Murphy wrote:
>>> On 30/05/18 09:03, Thierry Reding wrote:
>>>> From: Thierry Reding <treding@nvidia.com>
>>>>
>>>> Implement this function to enable drivers from detaching from any IOMMU
>>>> domains that architecture code might have attached them to so that they
>>>> can take exclusive control of the IOMMU via the IOMMU API.
>>>>
>>>> Signed-off-by: Thierry Reding <treding@nvidia.com>
>>>> ---
>>>> Changes in v3:
>>>> - make API 32-bit ARM specific
>>>> - avoid extra local variable
>>>>
>>>> Changes in v2:
>>>> - fix compilation
>>>>
>>>>    arch/arm/include/asm/dma-mapping.h |  3 +++
>>>>    arch/arm/mm/dma-mapping-nommu.c    |  4 ++++
>>>>    arch/arm/mm/dma-mapping.c          | 16 ++++++++++++++++
>>>>    3 files changed, 23 insertions(+)
>>>>
>>>> diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
>>>> index 8436f6ade57d..5960e9f3a9d0 100644
>>>> --- a/arch/arm/include/asm/dma-mapping.h
>>>> +++ b/arch/arm/include/asm/dma-mapping.h
>>>> @@ -103,6 +103,9 @@ extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>>>>    #define arch_teardown_dma_ops arch_teardown_dma_ops
>>>>    extern void arch_teardown_dma_ops(struct device *dev);
>>>> +#define arm_dma_iommu_detach_device arm_dma_iommu_detach_device
>>>> +extern void arm_dma_iommu_detach_device(struct device *dev);
>>>> +
>>>>    /* do not use this function in a driver */
>>>>    static inline bool is_device_dma_coherent(struct device *dev)
>>>>    {
>>>> diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
>>>> index f448a0663b10..eb781369377b 100644
>>>> --- a/arch/arm/mm/dma-mapping-nommu.c
>>>> +++ b/arch/arm/mm/dma-mapping-nommu.c
>>>> @@ -241,3 +241,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>>>>    void arch_teardown_dma_ops(struct device *dev)
>>>>    {
>>>>    }
>>>> +
>>>> +void arm_dma_iommu_detach_device(struct device *dev)
>>>> +{
>>>> +}
>>>> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
>>>> index af27f1c22d93..6d8af08b3e7d 100644
>>>> --- a/arch/arm/mm/dma-mapping.c
>>>> +++ b/arch/arm/mm/dma-mapping.c
>>>> @@ -2400,3 +2400,19 @@ void arch_teardown_dma_ops(struct device *dev)
>>>>    	arm_teardown_iommu_dma_ops(dev);
>>>>    }
>>>> +
>>>> +void arm_dma_iommu_detach_device(struct device *dev)
>>>> +{
>>>> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
>>>> +	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
>>>> +
>>>> +	if (!mapping)
>>>> +		return;
>>>> +
>>>> +	arm_iommu_release_mapping(mapping);
>>>
>>> Potentially freeing the mapping before you try to operate on it is never the
>>> best idea. Plus arm_iommu_detach_device() already releases a reference
>>> appropriately anyway, so it's a double-free.
>>
>> But the reference released by arm_iommu_detach_device() is to balance
>> out the reference acquired by arm_iommu_attach_device(), isn't it? In
>> the above, the arm_iommu_release_mapping() is supposed to drop the
>> final reference which was obtained by arm_iommu_create_mapping(). The
>> mapping shouldn't go away irrespective of the order in which these
>> will be called.
> 
> Going over the DMA/IOMMU code I just remembered that I drew inspiration
> from arm_teardown_iommu_dma_ops() for the initial proposal which also
> calls both arm_iommu_detach_device() and arm_iommu_release_mapping().
> That said, one other possibility to implement this would be to export
> the 32-bit and 64-bit ARM implementations of arch_teardown_dma_ops()
> and use that instead. linux/dma-mapping.h implements a stub for
> architectures that don't provide one, so it should work without any
> #ifdef guards.
> 
> That combined with the set_dma_ops() fix in arm_iommu_detach_device()
> should fix this pretty nicely.

OK, having a second look at the ARM code I see I had indeed overlooked 
that extra reference held until arm_teardown_iommu_dma_ops() - mea culpa 
- but frankly that looks wrong anyway, as it basically defeats the point 
of refcounting the mapping at all. AFAICS arm_setup_iommu_dma_ops() 
should just be made to behave 'normally' by unconditionally dropping the 
initial reference after calling __arm_iommu_attach_device(), then we 
don't need all these odd and confusing release calls dotted around at all.

Robin.

^ permalink raw reply

* [PATCH v3 2/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Thierry Reding @ 2018-05-30 13:41 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <856db04f-592c-1618-36a8-100808693813@arm.com>

On Wed, May 30, 2018 at 02:30:51PM +0100, Robin Murphy wrote:
> On 30/05/18 14:00, Thierry Reding wrote:
> > On Wed, May 30, 2018 at 11:30:25AM +0100, Robin Murphy wrote:
> > > On 30/05/18 09:03, Thierry Reding wrote:
> > > > From: Thierry Reding <treding@nvidia.com>
> > > > 
> > > > Depending on the kernel configuration, early ARM architecture setup code
> > > > may have attached the GPU to a DMA/IOMMU mapping that transparently uses
> > > > the IOMMU to back the DMA API. Tegra requires special handling for IOMMU
> > > > backed buffers (a special bit in the GPU's MMU page tables indicates the
> > > > memory path to take: via the SMMU or directly to the memory controller).
> > > > Transparently backing DMA memory with an IOMMU prevents Nouveau from
> > > > properly handling such memory accesses and causes memory access faults.
> > > > 
> > > > As a side-note: buffers other than those allocated in instance memory
> > > > don't need to be physically contiguous from the GPU's perspective since
> > > > the GPU can map them into contiguous buffers using its own MMU. Mapping
> > > > these buffers through the IOMMU is unnecessary and will even lead to
> > > > performance degradation because of the additional translation. One
> > > > exception to this are compressible buffers which need large pages. In
> > > > order to enable these large pages, multiple small pages will have to be
> > > > combined into one large (I/O virtually contiguous) mapping via the
> > > > IOMMU. However, that is a topic outside the scope of this fix and isn't
> > > > currently supported. An implementation will want to explicitly create
> > > > these large pages in the Nouveau driver, so detaching from a DMA/IOMMU
> > > > mapping would still be required.
> > > 
> > > I wonder if it might make sense to have a hook in iommu_attach_device() to
> > > notify the arch DMA API code when moving devices between unmanaged and DMA
> > > ops domains? That seems like it might be the most low-impact way to address
> > > the overall problem long-term.
> > > 
> > > > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > > > ---
> > > > Changes in v3:
> > > > - clarify the use of IOMMU mapping for compressible buffers
> > > > - squash multiple patches into this
> > > > 
> > > >    drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 5 +++++
> > > >    1 file changed, 5 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > > > index 78597da6313a..d0538af1b967 100644
> > > > --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > > > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > > > @@ -105,6 +105,11 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
> > > >    	unsigned long pgsize_bitmap;
> > > >    	int ret;
> > > > +#if IS_ENABLED(CONFIG_ARM)
> > > 
> > > Wouldn't CONFIG_ARM_DMA_USE_IOMMU be even more appropriate?
> > 
> > Not necessarily. arm_dma_iommu_detach_device() is always defined on ARM,
> > only with CONFIG_ARM_DMA_USE_IOMMU=n it will be empty. So this check is
> > a guard to make sure we don't call the function when it isn't available,
> > but it may still not do anything.
> 
> Calling a function under condition A, which only does anything under
> condition B, when B depends on A, is identical in behaviour to only calling
> the function under condition B, except needlessly harder to follow.
> 
> > > > +	/* make sure we can use the IOMMU exclusively */
> > > > +	arm_dma_iommu_detach_device(dev);
> > > 
> > > As before, I would just use the existing infrastructure the same way the
> > > Exynos DRM driver currently does in __exynos_iommu_attach() (albeit without
> > > then reattaching to another DMA ops mapping).
> > 
> > That's pretty much what I initially did and which was shot down by
> > Christoph. As I said earlier, at this point I don't really care what
> > color the shed will be. Can you and Christoph come to an agreement
> > on what it should be?
> 
> What I was getting at is that arm_iommu_detach_device() already *is* the
> exact function Christoph was asking for, it just needs a minor fix instead
> of adding explicit set_dma_ops() fiddling at its callsites which only
> obfuscates the fact that it's supposed to be responsible for resetting the
> device's DMA ops already.

It still has the downside of callers having to explicitly check for the
existence of a mapping, otherwise they'll cause a warning to be printed
to the kernel log.

That's not all that bad, though. I'll prepare version 4 with those
changes.

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180530/02b467fa/attachment.sig>

^ permalink raw reply

* [PATCH v3 2/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Robin Murphy @ 2018-05-30 13:30 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530130045.GB1595@ulmo>

On 30/05/18 14:00, Thierry Reding wrote:
> On Wed, May 30, 2018 at 11:30:25AM +0100, Robin Murphy wrote:
>> On 30/05/18 09:03, Thierry Reding wrote:
>>> From: Thierry Reding <treding@nvidia.com>
>>>
>>> Depending on the kernel configuration, early ARM architecture setup code
>>> may have attached the GPU to a DMA/IOMMU mapping that transparently uses
>>> the IOMMU to back the DMA API. Tegra requires special handling for IOMMU
>>> backed buffers (a special bit in the GPU's MMU page tables indicates the
>>> memory path to take: via the SMMU or directly to the memory controller).
>>> Transparently backing DMA memory with an IOMMU prevents Nouveau from
>>> properly handling such memory accesses and causes memory access faults.
>>>
>>> As a side-note: buffers other than those allocated in instance memory
>>> don't need to be physically contiguous from the GPU's perspective since
>>> the GPU can map them into contiguous buffers using its own MMU. Mapping
>>> these buffers through the IOMMU is unnecessary and will even lead to
>>> performance degradation because of the additional translation. One
>>> exception to this are compressible buffers which need large pages. In
>>> order to enable these large pages, multiple small pages will have to be
>>> combined into one large (I/O virtually contiguous) mapping via the
>>> IOMMU. However, that is a topic outside the scope of this fix and isn't
>>> currently supported. An implementation will want to explicitly create
>>> these large pages in the Nouveau driver, so detaching from a DMA/IOMMU
>>> mapping would still be required.
>>
>> I wonder if it might make sense to have a hook in iommu_attach_device() to
>> notify the arch DMA API code when moving devices between unmanaged and DMA
>> ops domains? That seems like it might be the most low-impact way to address
>> the overall problem long-term.
>>
>>> Signed-off-by: Thierry Reding <treding@nvidia.com>
>>> ---
>>> Changes in v3:
>>> - clarify the use of IOMMU mapping for compressible buffers
>>> - squash multiple patches into this
>>>
>>>    drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 5 +++++
>>>    1 file changed, 5 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>> index 78597da6313a..d0538af1b967 100644
>>> --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
>>> @@ -105,6 +105,11 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
>>>    	unsigned long pgsize_bitmap;
>>>    	int ret;
>>> +#if IS_ENABLED(CONFIG_ARM)
>>
>> Wouldn't CONFIG_ARM_DMA_USE_IOMMU be even more appropriate?
> 
> Not necessarily. arm_dma_iommu_detach_device() is always defined on ARM,
> only with CONFIG_ARM_DMA_USE_IOMMU=n it will be empty. So this check is
> a guard to make sure we don't call the function when it isn't available,
> but it may still not do anything.

Calling a function under condition A, which only does anything under 
condition B, when B depends on A, is identical in behaviour to only 
calling the function under condition B, except needlessly harder to follow.

>>> +	/* make sure we can use the IOMMU exclusively */
>>> +	arm_dma_iommu_detach_device(dev);
>>
>> As before, I would just use the existing infrastructure the same way the
>> Exynos DRM driver currently does in __exynos_iommu_attach() (albeit without
>> then reattaching to another DMA ops mapping).
> 
> That's pretty much what I initially did and which was shot down by
> Christoph. As I said earlier, at this point I don't really care what
> color the shed will be. Can you and Christoph come to an agreement
> on what it should be?

What I was getting at is that arm_iommu_detach_device() already *is* the 
exact function Christoph was asking for, it just needs a minor fix 
instead of adding explicit set_dma_ops() fiddling at its callsites which 
only obfuscates the fact that it's supposed to be responsible for 
resetting the device's DMA ops already.

Robin.

^ permalink raw reply

* [PATCH v9 00/12] Support PPTT for ARM64
From: Sudeep Holla @ 2018-05-30 13:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <a79f5acc-51f2-cd7c-5abf-c7c1df8064a1@arm.com>



On 29/05/18 22:52, Jeremy Linton wrote:
> Hi,
> 
> On 05/29/2018 10:51 AM, Geert Uytterhoeven wrote:
>> Hi Will,
>>
>> On Tue, May 29, 2018 at 5:08 PM, Will Deacon <will.deacon@arm.com> wrote:
>>> On Tue, May 29, 2018 at 02:18:40PM +0100, Sudeep Holla wrote:
>>>> On 29/05/18 12:56, Geert Uytterhoeven wrote:
>>>>> On Tue, May 29, 2018 at 1:14 PM, Sudeep Holla
>>>>> <sudeep.holla@arm.com> wrote:
>>>>>> On 29/05/18 11:48, Geert Uytterhoeven wrote:
>>>>>>> System supend still works fine on systems with big cores only:
>>>>>>>
>>>>>>> ???? R-Car H3 ES1.0 (4xCA57 (4xCA53 disabled in firmware))
>>>>>>> ???? R-Car M3-N (2xCA57)
>>>>>>>
>>>>>>> Reverting this commit fixes the issue for me.
>>>>>>
>>>>>> I can't find anything that relates to system suspend in these patches
>>>>>> unless they are messing with something during CPU hot plug-in back
>>>>>> during resume.
>>>>>
>>>>> It's only the last patch that introduces the breakage.
>>>>>
>>>>
>>>> As specified in the commit log, it won't change any behavior for DT
>>>> systems if it's non-NUMA or single node system. So I am still wondering
>>>> what could trigger this regression.
>>>
>>> I wonder if we're somehow giving an uninitialised/invalid NUMA
>>> configuration
>>> to the scheduler, although I can't see how this would happen.
>>>
>>> Geert -- if you enable CONFIG_DEBUG_PER_CPU_MAPS=y and apply the diff
>>> below
>>> do you see anything shouting in dmesg?
>>
>> Thanks, but unfortunately it doesn't help.
>> I added some debug code to print cpumask, but so far I don't see anything
>> suspicious.
> 
> I suspect most of the problem is related to the node mask changing at
> unexpected times (particularly cores being removed from the mask). Once
> I understand that more, there may be a simpler patch.
> 
> OTOH, I've been testing with this, and with it, I can't seem to
> duplicate the problem with CONFIG_NUMA disabled I found.
> 

I am also giving it a run on my Juno(defconfig - CONFIG_NUMA) and CPU
hotplug tests are fine with this change.

-- 
Regards,
Sudeep

^ permalink raw reply

* [PATCH 07/12] dt-bindings: tc358754: add DT bindings
From: Laurent Pinchart @ 2018-05-30 13:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530130731eucas1p160d53c3f8500bcecd000bd8895843817~zbgGgQw3b1079710797eucas1p1H@eucas1p1.samsung.com>

Hi Andrzej,

On Wednesday, 30 May 2018 16:07:29 EEST Andrzej Hajda wrote:
> On 30.05.2018 14:35, Laurent Pinchart wrote:
> > On Wednesday, 30 May 2018 12:59:12 EEST Andrzej Hajda wrote:
> >> On 28.05.2018 12:18, Laurent Pinchart wrote:
> >>> On Monday, 28 May 2018 12:47:11 EEST Maciej Purski wrote:
> >>>> The patch adds bindings to Toshiba DSI/LVDS bridge TC358764.
> >>>> Bindings describe power supplies, reset gpio and video interfaces.
> >>>> 
> >>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
> >>>> Signed-off-by: Maciej Purski <m.purski@samsung.com>
> >>>> ---
> >>>> 
> >>>>  .../bindings/display/bridge/toshiba,tc358764.txt   | 42
> >>>>  ++++++++++++++++
> >>>>  1 file changed, 42 insertions(+)
> >>>>  create mode 100644
> >>>> 
> >>>> Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
> >>>> 
> >>>> diff --git
> >>>> a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
> >>>> b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
> >>>> new
> >>>> file mode 100644
> >>>> index 0000000..d09bdc2
> >>>> --- /dev/null
> >>>> +++
> >>>> b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
> >>>> @@ -0,0 +1,42 @@
> >>>> +TC358764 MIPI-DSI to LVDS panel bridge
> >>>> +
> >>>> +Required properties:
> >>>> +  - compatible: "toshiba,tc358764"
> >>>> +  - reg: the virtual channel number of a DSI peripheral
> >>>> +  - vddc-supply: core voltage supply
> >>>> +  - vddio-supply: I/O voltage supply
> >>>> +  - vddmipi-supply: MIPI voltage supply
> >>>> +  - vddlvds133-supply: LVDS1 3.3V voltage supply
> >>>> +  - vddlvds112-supply: LVDS1 1.2V voltage supply
> >>> 
> >>> That's a lot of power supplies. Could some of them be merged together ?
> >>> See https://patchwork.freedesktop.org/patch/216058/ for an earlier
> >>> discussion on the same subject.
> >> 
> >> Specs says about 3 supply voltage values:
> >> - 1.2V - digital core, DSI-RX PHY
> >> - 1.8-3.3V - digital I/O
> >> - 3.3V - LVDS-TX PHY
> >> 
> >> So I guess it should be minimal number of supplies. Natural candidates:
> >> 
> >> - vddc-supply: core voltage supply, 1.2V
> >> - vddio-supply: I/O voltage supply, 1.8V or 3.3V
> >> - vddlvds-supply: LVDS1/2 voltage supply, 3.3V
> >> 
> >> I have changed name of the latest supply to be more consistent with
> >> other supplies, and changed 1.8-3.3 (which incorrectly suggest voltage
> >> range), to more precise voltage alternative.
> > 
> > This looks fine to me.
> > 
> >>>> +  - reset-gpios: a GPIO spec for the reset pin
> >>>> +
> >>>> +The device node can contain zero to two 'port' child nodes, each with
> >>>> one
> >>>> +child
> >>>> +'endpoint' node, according to the bindings defined in [1].
> >>>> +The following are properties specific to those nodes.
> >>>> +
> >>>> +port:
> >>>> +  - reg: (required) can be 0 for DSI port or 1 for LVDS port;
> >>> 
> >>> This seems pretty vague to me. It could be read as meaning that ports
> >>> are
> >>> completely optional, and that the port number you list can be used, but
> >>> that something else could be used to.
> >>> 
> >>> Let's make the port nodes mandatory. I propose the following.
> >>> 
> >>> Required nodes:
> >>> 
> >>> The TC358764 has DSI and LVDS ports whose connections are described
> >>> using
> >>> the OF graph bindings defined in
> >>> Documentation/devicetree/bindings/graph.txt. The device node must
> >>> contain
> >>> one 'port' child node per DSI and LVDS port. The port nodes are numbered
> >>> as follows.
> >>> 
> >>>   Port                  Number
> >>> 
> >>> -------------------------------------------------------------------
> >>> 
> >>>   DSI Input             0
> >>>   LVDS Output           1
> >>> 
> >>> Each port node must contain endpoint nodes describing the hardware
> >>> connections.
> >> 
> >> Since the bridge is controlled via DSI bus, DSI input port is not
> >> necessary.
> > 
> > I don't agree with this. Regardless of how the bridge is controlled, I
> > think we should always use ports to describe the data connections.
> > Otherwise it would get more complicated for display controller drivers to
> > use different types of bridges.
> 
> It was discussed already, and DT guideline is to skip graphs in simple
> case of parent/child nodes, see for example [1].
> 
> [1]: https://marc.info/?l=dri-devel&m=148354108702517&w=2

That's when the child as no other connection at all. I don't think it makes 
sense to mix description of connections through parent/child relationships and 
through ports for a single device.

And that being said, I don't agree with Rob's comment there. Having two 
different methods to describe connections means that you have to implement 
them both in all display controller drivers (and even all bridge drivers in 
the case of chained bridges). That's an extra complexity that can easily be 
avoided by standardizing DT bindings.

I also wonder whether Rob's position hasn't been reconsidered since then; I 
vaguely recalled another more recent discussion on this topic. I'm a bit too 
busy now to try and dig it up now I'm afraid :-/

> >>>> +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
> >>>> +
> >>>> +Example:
> >>>> +
> >>>> +	bridge at 0 {
> >>>> +		reg = <0>;
> >>>> +		compatible = "toshiba,tc358764";
> >>>> +		vddc-supply = <&vcc_1v2_reg>;
> >>>> +		vddio-supply = <&vcc_1v8_reg>;
> >>>> +		vddmipi-supply = <&vcc_1v2_reg>;
> >>>> +		vddlvds133-supply = <&vcc_3v3_reg>;
> >>>> +		vddlvds112-supply = <&vcc_1v2_reg>;
> >>>> +		reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
> >>>> +		#address-cells = <1>;
> >>>> +		#size-cells = <0>;
> >>>> +		port at 1 {
> >>>> +			reg = <1>;
> >>>> +			lvds_ep: endpoint {
> >>>> +				remote-endpoint = <&panel_ep>;
> >>>> +			};
> >>>> +		};
> >>>> +	};

-- 
Regards,

Laurent Pinchart

^ permalink raw reply

* [PATCH v2 2/2] ARM: debug: fix BCM2836 order entry
From: Clément Péron @ 2018-05-30 13:19 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530131956.13972-1-peron.clem@gmail.com>

From: Cl?ment Peron <clement.peron@devialet.com>

Entries are sorted by their address value, except the BCM2836/KONA
which are not in the proper order.

Signed-off-by: Cl?ment Peron <clement.peron@devialet.com>
---
 arch/arm/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 4ea9d5793b91..1571d6c8f40e 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1572,8 +1572,8 @@ config DEBUG_UART_PHYS
 	default 0x20064000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2
 	default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3
 	default 0x20201000 if DEBUG_BCM2835
-	default 0x3f201000 if DEBUG_BCM2836
 	default 0x3e000000 if DEBUG_BCM_KONA_UART
+	default 0x3f201000 if DEBUG_BCM2836
 	default 0x4000e400 if DEBUG_LL_UART_EFM32
 	default 0x40028000 if DEBUG_AT91_SAMV7_USART1
 	default 0x40081000 if DEBUG_LPC18XX_UART0
-- 
2.17.0

^ permalink raw reply related

* [PATCH v2 1/2] ARM: debug: Add Iproc UART3 debug addresses
From: Clément Péron @ 2018-05-30 13:19 UTC (permalink / raw)
  To: linux-arm-kernel

From: Cl?ment Peron <clement.peron@devialet.com>

Broadcom Iproc SoCs typically use the UART3 for
debug/console, provide a known good location for that.

Signed-off-by: Cl?ment Peron <clement.peron@devialet.com>
---

 arch/arm/Kconfig.debug | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 199ebc1c4538..4ea9d5793b91 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -207,6 +207,14 @@ choice
 		depends on ARCH_BCM_HR2
 		select DEBUG_UART_8250
 
+	config DEBUG_BCM_IPROC_UART3
+		bool "Kernel low-level debugging on BCM IPROC UART3"
+		depends on ARCH_BCM_CYGNUS
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the third serial port on these devices.
+
 	config DEBUG_BCM_KONA_UART
 		bool "Kernel low-level debugging messages via BCM KONA UART"
 		depends on ARCH_BCM_MOBILE
@@ -1557,6 +1565,7 @@ config DEBUG_UART_PHYS
 	default 0x18000400 if DEBUG_BCM_HR2
 	default 0x18010000 if DEBUG_SIRFATLAS7_UART0
 	default 0x18020000 if DEBUG_SIRFATLAS7_UART1
+	default 0x18023000 if DEBUG_BCM_IPROC_UART3
 	default 0x1c090000 if DEBUG_VEXPRESS_UART0_RS1
 	default 0x20001000 if DEBUG_HIP01_UART
 	default 0x20060000 if DEBUG_RK29_UART0
@@ -1676,6 +1685,7 @@ config DEBUG_UART_VIRT
 	default 0xf1002000 if DEBUG_MT8127_UART0
 	default 0xf1006000 if DEBUG_MT6589_UART0
 	default 0xf1009000 if DEBUG_MT8135_UART3
+	default 0xf1023000 if DEBUG_BCM_IPROC_UART3
 	default 0xf11f1000 if DEBUG_VERSATILE
 	default 0xf1600000 if DEBUG_INTEGRATOR
 	default 0xf1c28000 if DEBUG_SUNXI_UART0
@@ -1791,7 +1801,7 @@ config DEBUG_UART_8250_WORD
 		DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \
 		DEBUG_ALPINE_UART0 || \
 		DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \
-		DEBUG_DAVINCI_DA8XX_UART2 || \
+		DEBUG_DAVINCI_DA8XX_UART2 || DEBUG_BCM_IPROC_UART3 || \
 		DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2
 
 config DEBUG_UART_8250_PALMCHIP
-- 
2.17.0

^ permalink raw reply related

* [PATCH 10/11] ARM64: dts: ls1046a: Remove fsl, qspi-has-second-chip as it is not used
From: Frieder Schrempf @ 2018-05-30 13:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527686082-15142-1-git-send-email-frieder.schrempf@exceet.de>

After switching to the new FSL QSPI driver the property
'fsl,qspi-has-second-chip' is not needed anymore.

The driver now uses the 'reg' property to determine the bus and
the chipselect.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 136ebfa..871189e 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -247,7 +247,6 @@
 			clock-names = "qspi_en", "qspi";
 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
 			big-endian;
-			fsl,qspi-has-second-chip;
 			status = "disabled";
 		};
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH 07/11] ARM: defconfig: Use the new FSL QSPI driver under the SPI framework
From: Frieder Schrempf @ 2018-05-30 13:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527686082-15142-1-git-send-email-frieder.schrempf@exceet.de>

The new driver at spi/spi-fsl-qspi.c replaces the old SPI NOR driver
at mtd/fsl-quadspi.c. Switch to the new driver in the defconfigs.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 arch/arm/configs/imx_v6_v7_defconfig | 2 +-
 arch/arm/configs/multi_v7_defconfig  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f70507a..d07a535 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -111,7 +111,6 @@ CONFIG_MTD_NAND_GPMI_NAND=y
 CONFIG_MTD_NAND_VF610_NFC=y
 CONFIG_MTD_NAND_MXC=y
 CONFIG_MTD_SPI_NOR=y
-CONFIG_SPI_FSL_QUADSPI=y
 CONFIG_MTD_UBI=y
 CONFIG_MTD_UBI_FASTMAP=y
 CONFIG_MTD_UBI_BLOCK=y
@@ -199,6 +198,7 @@ CONFIG_I2C_ALGOPCA=m
 CONFIG_I2C_GPIO=y
 CONFIG_I2C_IMX=y
 CONFIG_SPI=y
+CONFIG_SPI_FSL_QSPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_SPI_IMX=y
 CONFIG_SPI_FSL_DSPI=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 7b2283a..1423ec3 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -191,7 +191,6 @@ CONFIG_MTD_NAND_BRCMNAND=y
 CONFIG_MTD_NAND_VF610_NFC=y
 CONFIG_MTD_NAND_DAVINCI=y
 CONFIG_MTD_SPI_NOR=y
-CONFIG_SPI_FSL_QUADSPI=m
 CONFIG_MTD_UBI=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
@@ -379,6 +378,7 @@ CONFIG_SPI_BCM2835=y
 CONFIG_SPI_BCM2835AUX=y
 CONFIG_SPI_CADENCE=y
 CONFIG_SPI_DAVINCI=y
+CONFIG_SPI_FSL_QSPI=m
 CONFIG_SPI_GPIO=m
 CONFIG_SPI_FSL_DSPI=m
 CONFIG_SPI_OMAP24XX=y
-- 
2.7.4

^ permalink raw reply related

* [PATCH 06/11] arm64: dts: Reflect change of FSL QSPI driver and remove unused properties
From: Frieder Schrempf @ 2018-05-30 13:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527686082-15142-1-git-send-email-frieder.schrempf@exceet.de>

The FSL QSPI driver was moved to the SPI framework and it now
acts as a SPI controller. Therefore the subnodes need to set
spi-[rx/tx]-bus-width = <4>, so quad mode is used just as before.

Also the properties 'num-cs' and 'bus-num' were never read by the
driver and can be removed.

The property 'fsl,qspi-has-second-chip' is not needed anymore
and will be removed after the old driver was disabled to avoid
breaking fsl-ls1046a-rdb.dts.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts  | 3 ++-
 arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts  | 4 ++--
 arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts  | 6 ++++--
 arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi | 4 ++++
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
index 6341281..31e7b31 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
@@ -170,7 +170,6 @@
 };
 
 &qspi {
-	bus-num = <0>;
 	status = "okay";
 
 	qflash0: s25fl128s at 0 {
@@ -178,6 +177,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 };
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
index 434383b..dc10105 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
@@ -198,8 +198,6 @@
 };
 
 &qspi {
-	num-cs = <2>;
-	bus-num = <0>;
 	status = "okay";
 
 	qflash0: s25fl128s at 0 {
@@ -207,6 +205,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 };
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
index 5dc2782..1848c33 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
@@ -136,8 +136,6 @@
 };
 
 &qspi {
-	num-cs = <2>;
-	bus-num = <0>;
 	status = "okay";
 
 	qflash0: s25fs512s at 0 {
@@ -145,6 +143,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 
@@ -153,6 +153,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <1>;
 	};
 };
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
index 1de6188..fc62ed9 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
@@ -170,6 +170,8 @@
 		#size-cells = <1>;
 		compatible = "st,m25p80";
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 	flash2: s25fl256s1 at 2 {
@@ -177,6 +179,8 @@
 		#size-cells = <1>;
 		compatible = "st,m25p80";
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <2>;
 	};
 };
-- 
2.7.4

^ permalink raw reply related

* [PATCH 05/11] ARM: dts: Reflect change of FSL QSPI driver and remove unused properties
From: Frieder Schrempf @ 2018-05-30 13:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527686082-15142-1-git-send-email-frieder.schrempf@exceet.de>

The FSL QSPI driver was moved to the SPI framework and it now
acts as a SPI controller. Therefore the subnodes need to set
spi-[rx/tx]-bus-width = <4>, so quad mode is used just as before.

Also the properties 'bus-num', 'fsl,spi-num-chipselects' and
'fsl,spi-flash-chipselects' were never read by the driver and
can be removed.

The 'reg' properties are adjusted to reflect the what bus and
chipselect the flash is connected to, as the new driver needs
this information.

The property 'fsl,qspi-has-second-chip' is not needed anymore
and will be removed after the old driver was disabled to avoid
breaking ls1021a-moxa-uc-8410a.dts.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
---
 arch/arm/boot/dts/imx6sx-sdb-reva.dts       | 8 ++++++--
 arch/arm/boot/dts/imx6sx-sdb.dts            | 8 ++++++--
 arch/arm/boot/dts/imx6ul-14x14-evk.dtsi     | 2 ++
 arch/arm/boot/dts/ls1021a-moxa-uc-8410a.dts | 5 ++---
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
index e3533e7..1a6f680 100644
--- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
@@ -131,13 +131,17 @@
 		#size-cells = <1>;
 		compatible = "spansion,s25fl128s", "jedec,spi-nor";
 		spi-max-frequency = <66000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 	};
 
-	flash1: s25fl128s at 1 {
-		reg = <1>;
+	flash1: s25fl128s at 2 {
+		reg = <2>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 		compatible = "spansion,s25fl128s", "jedec,spi-nor";
 		spi-max-frequency = <66000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 	};
 };
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
index 6dd9beb..9acfda8 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@@ -117,15 +117,19 @@
 		#size-cells = <1>;
 		compatible = "micron,n25q256a", "jedec,spi-nor";
 		spi-max-frequency = <29000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 
-	flash1: n25q256a at 1 {
+	flash1: n25q256a at 2 {
 		#address-cells = <1>;
 		#size-cells = <1>;
 		compatible = "micron,n25q256a", "jedec,spi-nor";
 		spi-max-frequency = <29000000>;
-		reg = <1>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
+		reg = <2>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
index 32a0723..c2c9a2a 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
@@ -176,6 +176,8 @@
 		#size-cells = <1>;
 		compatible = "micron,n25q256a";
 		spi-max-frequency = <29000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 	};
 };
diff --git a/arch/arm/boot/dts/ls1021a-moxa-uc-8410a.dts b/arch/arm/boot/dts/ls1021a-moxa-uc-8410a.dts
index d01f64b..6a83f30 100644
--- a/arch/arm/boot/dts/ls1021a-moxa-uc-8410a.dts
+++ b/arch/arm/boot/dts/ls1021a-moxa-uc-8410a.dts
@@ -203,9 +203,6 @@
 };
 
 &qspi {
-	bus-num = <0>;
-	fsl,spi-num-chipselects = <2>;
-	fsl,spi-flash-chipselects = <0>;
 	fsl,qspi-has-second-chip;
 	status = "okay";
 
@@ -214,6 +211,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		spi-max-frequency = <20000000>;
+		spi-rx-bus-width = <4>;
+		spi-tx-bus-width = <4>;
 		reg = <0>;
 
 		partitions at 0 {
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 1/2] ARM: dma-mapping: Implement arm_dma_iommu_detach_device()
From: Thierry Reding @ 2018-05-30 13:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530125446.GA1595@ulmo>

On Wed, May 30, 2018 at 02:54:46PM +0200, Thierry Reding wrote:
> On Wed, May 30, 2018 at 10:59:30AM +0100, Robin Murphy wrote:
> > On 30/05/18 09:03, Thierry Reding wrote:
> > > From: Thierry Reding <treding@nvidia.com>
> > > 
> > > Implement this function to enable drivers from detaching from any IOMMU
> > > domains that architecture code might have attached them to so that they
> > > can take exclusive control of the IOMMU via the IOMMU API.
> > > 
> > > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > > ---
> > > Changes in v3:
> > > - make API 32-bit ARM specific
> > > - avoid extra local variable
> > > 
> > > Changes in v2:
> > > - fix compilation
> > > 
> > >   arch/arm/include/asm/dma-mapping.h |  3 +++
> > >   arch/arm/mm/dma-mapping-nommu.c    |  4 ++++
> > >   arch/arm/mm/dma-mapping.c          | 16 ++++++++++++++++
> > >   3 files changed, 23 insertions(+)
> > > 
> > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > index 8436f6ade57d..5960e9f3a9d0 100644
> > > --- a/arch/arm/include/asm/dma-mapping.h
> > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > @@ -103,6 +103,9 @@ extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> > >   #define arch_teardown_dma_ops arch_teardown_dma_ops
> > >   extern void arch_teardown_dma_ops(struct device *dev);
> > > +#define arm_dma_iommu_detach_device arm_dma_iommu_detach_device
> > > +extern void arm_dma_iommu_detach_device(struct device *dev);
> > > +
> > >   /* do not use this function in a driver */
> > >   static inline bool is_device_dma_coherent(struct device *dev)
> > >   {
> > > diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
> > > index f448a0663b10..eb781369377b 100644
> > > --- a/arch/arm/mm/dma-mapping-nommu.c
> > > +++ b/arch/arm/mm/dma-mapping-nommu.c
> > > @@ -241,3 +241,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> > >   void arch_teardown_dma_ops(struct device *dev)
> > >   {
> > >   }
> > > +
> > > +void arm_dma_iommu_detach_device(struct device *dev)
> > > +{
> > > +}
> > > diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> > > index af27f1c22d93..6d8af08b3e7d 100644
> > > --- a/arch/arm/mm/dma-mapping.c
> > > +++ b/arch/arm/mm/dma-mapping.c
> > > @@ -2400,3 +2400,19 @@ void arch_teardown_dma_ops(struct device *dev)
> > >   	arm_teardown_iommu_dma_ops(dev);
> > >   }
> > > +
> > > +void arm_dma_iommu_detach_device(struct device *dev)
> > > +{
> > > +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> > > +	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
> > > +
> > > +	if (!mapping)
> > > +		return;
> > > +
> > > +	arm_iommu_release_mapping(mapping);
> > 
> > Potentially freeing the mapping before you try to operate on it is never the
> > best idea. Plus arm_iommu_detach_device() already releases a reference
> > appropriately anyway, so it's a double-free.
> 
> But the reference released by arm_iommu_detach_device() is to balance
> out the reference acquired by arm_iommu_attach_device(), isn't it? In
> the above, the arm_iommu_release_mapping() is supposed to drop the
> final reference which was obtained by arm_iommu_create_mapping(). The
> mapping shouldn't go away irrespective of the order in which these
> will be called.

Going over the DMA/IOMMU code I just remembered that I drew inspiration
from arm_teardown_iommu_dma_ops() for the initial proposal which also
calls both arm_iommu_detach_device() and arm_iommu_release_mapping().
That said, one other possibility to implement this would be to export
the 32-bit and 64-bit ARM implementations of arch_teardown_dma_ops()
and use that instead. linux/dma-mapping.h implements a stub for
architectures that don't provide one, so it should work without any
#ifdef guards.

That combined with the set_dma_ops() fix in arm_iommu_detach_device()
should fix this pretty nicely.

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180530/64c953e2/attachment.sig>

^ permalink raw reply

* [PATCH] ARM: debug: Add Iproc UART3 debug addresses
From: Baruch Siach @ 2018-05-30 13:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAJiuCcdJi6Ln3+Eip+Nc=-4aZu4bsTfT-MPVZisec7e10jO4XQ@mail.gmail.com>

Hi Cl?ment,

On Wed, May 30, 2018 at 02:55:28PM +0200, Cl?ment P?ron wrote:
> On Wed, 30 May 2018 at 14:47, Baruch Siach <baruch@tkos.co.il> wrote:
> > On Wed, May 30, 2018 at 02:29:22PM +0200, Cl?ment P?ron wrote:
> > > From: Cl?ment Peron <clement.peron@devialet.com>
> > >
> > > Broadcom Iproc SoCs typically use the UART3 for
> > > debug/console, provide a known good location for that.
> > >
> > > Signed-off-by: Cl?ment Peron <clement.peron@devialet.com>
> > > ---
> > >  arch/arm/Kconfig.debug | 12 +++++++++++-
> > >  1 file changed, 11 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
> > > index 199ebc1c4538..fa6fa1dae94d 100644
> > > --- a/arch/arm/Kconfig.debug
> > > +++ b/arch/arm/Kconfig.debug
> > > @@ -207,6 +207,14 @@ choice
> > >               depends on ARCH_BCM_HR2
> > >               select DEBUG_UART_8250
> > >
> > > +     config DEBUG_BCM_IPROC_UART3
> > > +             bool "Kernel low-level debugging on BCM IPROC UART3"
> > > +             depends on ARCH_BCM_CYGNUS
> > > +             select DEBUG_UART_8250
> > > +             help
> > > +               Say Y here if you want the debug print routines to
> direct
> > > +               their output to the third serial port on these devices.
> > > +
> > >       config DEBUG_BCM_KONA_UART
> > >               bool "Kernel low-level debugging messages via BCM KONA
> UART"
> > >               depends on ARCH_BCM_MOBILE
> > > @@ -1564,6 +1572,7 @@ config DEBUG_UART_PHYS
> > >       default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3
> > >       default 0x20201000 if DEBUG_BCM2835
> > >       default 0x3f201000 if DEBUG_BCM2836
> > > +     default 0x18023000 if DEBUG_BCM_IPROC_UART3
> 
> > Entries are sorted by the address value. Except that DEBUG_BCM_KONA_UART
> > should be listed above DEBUG_BCM2836.
> 
> Indeed, can I fix the DEBUG_BCM_KONA_UART entry in the same commit ?

That is an unrelated fix, so a separate patch is preferred.

baruch

> > >       default 0x3e000000 if DEBUG_BCM_KONA_UART
> > >       default 0x4000e400 if DEBUG_LL_UART_EFM32
> > >       default 0x40028000 if DEBUG_AT91_SAMV7_USART1
> > > @@ -1730,6 +1739,7 @@ config DEBUG_UART_VIRT
> > >       default 0xfe018000 if DEBUG_MMP_UART3
> > >       default 0xfe100000 if DEBUG_IMX23_UART || DEBUG_IMX28_UART
> > >       default 0xfe230000 if DEBUG_PICOXCELL_UART
> > > +     default 0xf1023000 if DEBUG_BCM_IPROC_UART3
> 
> > Same here.
> 
> > >       default 0xfe300000 if DEBUG_BCM_KONA_UART
> > >       default 0xfe800000 if ARCH_IOP32X
> > >       default 0xfeb00000 if DEBUG_HI3620_UART || DEBUG_HIX5HD2_UART
> > > @@ -1791,7 +1801,7 @@ config DEBUG_UART_8250_WORD
> > >               DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \
> > >               DEBUG_ALPINE_UART0 || \
> > >               DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \
> > > -             DEBUG_DAVINCI_DA8XX_UART2 || \
> > > +             DEBUG_DAVINCI_DA8XX_UART2 || DEBUG_BCM_IPROC_UART3 || \
> > >               DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2

-- 
     http://baruch.siach.name/blog/                  ~. .~   Tk Open Systems
=}------------------------------------------------ooO--U--Ooo------------{=
   - baruch at tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -

^ permalink raw reply

* [PATCH 07/12] dt-bindings: tc358754: add DT bindings
From: Andrzej Hajda @ 2018-05-30 13:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1928297.lKUBOH9NhR@avalon>

On 30.05.2018 14:35, Laurent Pinchart wrote:
> Hi Andrzej,
>
> On Wednesday, 30 May 2018 12:59:12 EEST Andrzej Hajda wrote:
>> On 28.05.2018 12:18, Laurent Pinchart wrote:
>>> On Monday, 28 May 2018 12:47:11 EEST Maciej Purski wrote:
>>>> The patch adds bindings to Toshiba DSI/LVDS bridge TC358764.
>>>> Bindings describe power supplies, reset gpio and video interfaces.
>>>>
>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>> Signed-off-by: Maciej Purski <m.purski@samsung.com>
>>>> ---
>>>>
>>>>  .../bindings/display/bridge/toshiba,tc358764.txt   | 42 ++++++++++++++++
>>>>  1 file changed, 42 insertions(+)
>>>>  create mode 100644
>>>>
>>>> Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
>>>>
>>>> diff --git
>>>> a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
>>>> b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
>>>> new
>>>> file mode 100644
>>>> index 0000000..d09bdc2
>>>> --- /dev/null
>>>> +++
>>>> b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358764.txt
>>>> @@ -0,0 +1,42 @@
>>>> +TC358764 MIPI-DSI to LVDS panel bridge
>>>> +
>>>> +Required properties:
>>>> +  - compatible: "toshiba,tc358764"
>>>> +  - reg: the virtual channel number of a DSI peripheral
>>>> +  - vddc-supply: core voltage supply
>>>> +  - vddio-supply: I/O voltage supply
>>>> +  - vddmipi-supply: MIPI voltage supply
>>>> +  - vddlvds133-supply: LVDS1 3.3V voltage supply
>>>> +  - vddlvds112-supply: LVDS1 1.2V voltage supply
>>> That's a lot of power supplies. Could some of them be merged together ?
>>> See https://patchwork.freedesktop.org/patch/216058/ for an earlier
>>> discussion on the same subject.
>> Specs says about 3 supply voltage values:
>> - 1.2V - digital core, DSI-RX PHY
>> - 1.8-3.3V - digital I/O
>> - 3.3V - LVDS-TX PHY
>>
>> So I guess it should be minimal number of supplies. Natural candidates:
>>
>> - vddc-supply: core voltage supply, 1.2V
>> - vddio-supply: I/O voltage supply, 1.8V or 3.3V
>> - vddlvds-supply: LVDS1/2 voltage supply, 3.3V
>>
>> I have changed name of the latest supply to be more consistent with
>> other supplies, and changed 1.8-3.3 (which incorrectly suggest voltage
>> range), to more precise voltage alternative.
> This looks fine to me.
>
>>>> +  - reset-gpios: a GPIO spec for the reset pin
>>>> +
>>>> +The device node can contain zero to two 'port' child nodes, each with
>>>> one
>>>> +child
>>>> +'endpoint' node, according to the bindings defined in [1].
>>>> +The following are properties specific to those nodes.
>>>> +
>>>> +port:
>>>> +  - reg: (required) can be 0 for DSI port or 1 for LVDS port;
>>> This seems pretty vague to me. It could be read as meaning that ports are
>>> completely optional, and that the port number you list can be used, but
>>> that something else could be used to.
>>>
>>> Let's make the port nodes mandatory. I propose the following.
>>>
>>> Required nodes:
>>>
>>> The TC358764 has DSI and LVDS ports whose connections are described using
>>> the OF graph bindings defined in
>>> Documentation/devicetree/bindings/graph.txt. The device node must contain
>>> one 'port' child node per DSI and LVDS port. The port nodes are numbered
>>> as follows.
>>>
>>>   Port                  Number
>>> -------------------------------------------------------------------
>>>   DSI Input             0
>>>   LVDS Output           1
>>>
>>> Each port node must contain endpoint nodes describing the hardware
>>> connections.
>> Since the bridge is controlled via DSI bus, DSI input port is not necessary.
> I don't agree with this. Regardless of how the bridge is controlled, I think 
> we should always use ports to describe the data connections. Otherwise it 
> would get more complicated for display controller drivers to use different 
> types of bridges.


It was discussed already, and DT guideline is to skip graphs in simple
case of parent/child nodes, see for example [1].

[1]: https://marc.info/?l=dri-devel&m=148354108702517&w=2

Regards
Andrzej

>>>> +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
>>>> +
>>>> +Example:
>>>> +
>>>> +	bridge at 0 {
>>>> +		reg = <0>;
>>>> +		compatible = "toshiba,tc358764";
>>>> +		vddc-supply = <&vcc_1v2_reg>;
>>>> +		vddio-supply = <&vcc_1v8_reg>;
>>>> +		vddmipi-supply = <&vcc_1v2_reg>;
>>>> +		vddlvds133-supply = <&vcc_3v3_reg>;
>>>> +		vddlvds112-supply = <&vcc_1v2_reg>;
>>>> +		reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
>>>> +		#address-cells = <1>;
>>>> +		#size-cells = <0>;
>>>> +		port at 1 {
>>>> +			reg = <1>;
>>>> +			lvds_ep: endpoint {
>>>> +				remote-endpoint = <&panel_ep>;
>>>> +			};
>>>> +		};
>>>> +	};

^ permalink raw reply

* [PATCH v3 2/2] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping
From: Thierry Reding @ 2018-05-30 13:00 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <7960e4fc-f680-f8d1-0c5a-3ff1e13b3154@arm.com>

On Wed, May 30, 2018 at 11:30:25AM +0100, Robin Murphy wrote:
> On 30/05/18 09:03, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> > 
> > Depending on the kernel configuration, early ARM architecture setup code
> > may have attached the GPU to a DMA/IOMMU mapping that transparently uses
> > the IOMMU to back the DMA API. Tegra requires special handling for IOMMU
> > backed buffers (a special bit in the GPU's MMU page tables indicates the
> > memory path to take: via the SMMU or directly to the memory controller).
> > Transparently backing DMA memory with an IOMMU prevents Nouveau from
> > properly handling such memory accesses and causes memory access faults.
> > 
> > As a side-note: buffers other than those allocated in instance memory
> > don't need to be physically contiguous from the GPU's perspective since
> > the GPU can map them into contiguous buffers using its own MMU. Mapping
> > these buffers through the IOMMU is unnecessary and will even lead to
> > performance degradation because of the additional translation. One
> > exception to this are compressible buffers which need large pages. In
> > order to enable these large pages, multiple small pages will have to be
> > combined into one large (I/O virtually contiguous) mapping via the
> > IOMMU. However, that is a topic outside the scope of this fix and isn't
> > currently supported. An implementation will want to explicitly create
> > these large pages in the Nouveau driver, so detaching from a DMA/IOMMU
> > mapping would still be required.
> 
> I wonder if it might make sense to have a hook in iommu_attach_device() to
> notify the arch DMA API code when moving devices between unmanaged and DMA
> ops domains? That seems like it might be the most low-impact way to address
> the overall problem long-term.
> 
> > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > ---
> > Changes in v3:
> > - clarify the use of IOMMU mapping for compressible buffers
> > - squash multiple patches into this
> > 
> >   drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 5 +++++
> >   1 file changed, 5 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > index 78597da6313a..d0538af1b967 100644
> > --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
> > @@ -105,6 +105,11 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
> >   	unsigned long pgsize_bitmap;
> >   	int ret;
> > +#if IS_ENABLED(CONFIG_ARM)
> 
> Wouldn't CONFIG_ARM_DMA_USE_IOMMU be even more appropriate?

Not necessarily. arm_dma_iommu_detach_device() is always defined on ARM,
only with CONFIG_ARM_DMA_USE_IOMMU=n it will be empty. So this check is
a guard to make sure we don't call the function when it isn't available,
but it may still not do anything.

> 
> > +	/* make sure we can use the IOMMU exclusively */
> > +	arm_dma_iommu_detach_device(dev);
> 
> As before, I would just use the existing infrastructure the same way the
> Exynos DRM driver currently does in __exynos_iommu_attach() (albeit without
> then reattaching to another DMA ops mapping).

That's pretty much what I initially did and which was shot down by
Christoph. As I said earlier, at this point I don't really care what
color the shed will be. Can you and Christoph come to an agreement
on what it should be?

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180530/b6e5bbcc/attachment.sig>

^ permalink raw reply

* [PATCH] ARM: debug: Add Iproc UART3 debug addresses
From: Clément Péron @ 2018-05-30 12:55 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530123809.khyzehg62ct6jzws@sapphire.tkos.co.il>

Hi Baruch,

On Wed, 30 May 2018 at 14:47, Baruch Siach <baruch@tkos.co.il> wrote:

> Hi Cl?ment,

> On Wed, May 30, 2018 at 02:29:22PM +0200, Cl?ment P?ron wrote:
> > From: Cl?ment Peron <clement.peron@devialet.com>
> >
> > Broadcom Iproc SoCs typically use the UART3 for
> > debug/console, provide a known good location for that.
> >
> > Signed-off-by: Cl?ment Peron <clement.peron@devialet.com>
> > ---
> >  arch/arm/Kconfig.debug | 12 +++++++++++-
> >  1 file changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
> > index 199ebc1c4538..fa6fa1dae94d 100644
> > --- a/arch/arm/Kconfig.debug
> > +++ b/arch/arm/Kconfig.debug
> > @@ -207,6 +207,14 @@ choice
> >               depends on ARCH_BCM_HR2
> >               select DEBUG_UART_8250
> >
> > +     config DEBUG_BCM_IPROC_UART3
> > +             bool "Kernel low-level debugging on BCM IPROC UART3"
> > +             depends on ARCH_BCM_CYGNUS
> > +             select DEBUG_UART_8250
> > +             help
> > +               Say Y here if you want the debug print routines to
direct
> > +               their output to the third serial port on these devices.
> > +
> >       config DEBUG_BCM_KONA_UART
> >               bool "Kernel low-level debugging messages via BCM KONA
UART"
> >               depends on ARCH_BCM_MOBILE
> > @@ -1564,6 +1572,7 @@ config DEBUG_UART_PHYS
> >       default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3
> >       default 0x20201000 if DEBUG_BCM2835
> >       default 0x3f201000 if DEBUG_BCM2836
> > +     default 0x18023000 if DEBUG_BCM_IPROC_UART3

> Entries are sorted by the address value. Except that DEBUG_BCM_KONA_UART
> should be listed above DEBUG_BCM2836.

Indeed, can I fix the DEBUG_BCM_KONA_UART entry in the same commit ?


> >       default 0x3e000000 if DEBUG_BCM_KONA_UART
> >       default 0x4000e400 if DEBUG_LL_UART_EFM32
> >       default 0x40028000 if DEBUG_AT91_SAMV7_USART1
> > @@ -1730,6 +1739,7 @@ config DEBUG_UART_VIRT
> >       default 0xfe018000 if DEBUG_MMP_UART3
> >       default 0xfe100000 if DEBUG_IMX23_UART || DEBUG_IMX28_UART
> >       default 0xfe230000 if DEBUG_PICOXCELL_UART
> > +     default 0xf1023000 if DEBUG_BCM_IPROC_UART3

> Same here.

> >       default 0xfe300000 if DEBUG_BCM_KONA_UART
> >       default 0xfe800000 if ARCH_IOP32X
> >       default 0xfeb00000 if DEBUG_HI3620_UART || DEBUG_HIX5HD2_UART
> > @@ -1791,7 +1801,7 @@ config DEBUG_UART_8250_WORD
> >               DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \
> >               DEBUG_ALPINE_UART0 || \
> >               DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \
> > -             DEBUG_DAVINCI_DA8XX_UART2 || \
> > +             DEBUG_DAVINCI_DA8XX_UART2 || DEBUG_BCM_IPROC_UART3 || \
> >               DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2

> baruch

> --
>       http://baruch.siach.name/blog/                  ~. .~   Tk Open
Systems

=}------------------------------------------------ooO--U--Ooo------------{=
>     - baruch at tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -

Clement

^ permalink raw reply

* [PATCH v3 1/2] ARM: dma-mapping: Implement arm_dma_iommu_detach_device()
From: Thierry Reding @ 2018-05-30 12:54 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <eee02391-aa25-da84-f98a-b5fed6c69599@arm.com>

On Wed, May 30, 2018 at 10:59:30AM +0100, Robin Murphy wrote:
> On 30/05/18 09:03, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> > 
> > Implement this function to enable drivers from detaching from any IOMMU
> > domains that architecture code might have attached them to so that they
> > can take exclusive control of the IOMMU via the IOMMU API.
> > 
> > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > ---
> > Changes in v3:
> > - make API 32-bit ARM specific
> > - avoid extra local variable
> > 
> > Changes in v2:
> > - fix compilation
> > 
> >   arch/arm/include/asm/dma-mapping.h |  3 +++
> >   arch/arm/mm/dma-mapping-nommu.c    |  4 ++++
> >   arch/arm/mm/dma-mapping.c          | 16 ++++++++++++++++
> >   3 files changed, 23 insertions(+)
> > 
> > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > index 8436f6ade57d..5960e9f3a9d0 100644
> > --- a/arch/arm/include/asm/dma-mapping.h
> > +++ b/arch/arm/include/asm/dma-mapping.h
> > @@ -103,6 +103,9 @@ extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> >   #define arch_teardown_dma_ops arch_teardown_dma_ops
> >   extern void arch_teardown_dma_ops(struct device *dev);
> > +#define arm_dma_iommu_detach_device arm_dma_iommu_detach_device
> > +extern void arm_dma_iommu_detach_device(struct device *dev);
> > +
> >   /* do not use this function in a driver */
> >   static inline bool is_device_dma_coherent(struct device *dev)
> >   {
> > diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
> > index f448a0663b10..eb781369377b 100644
> > --- a/arch/arm/mm/dma-mapping-nommu.c
> > +++ b/arch/arm/mm/dma-mapping-nommu.c
> > @@ -241,3 +241,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> >   void arch_teardown_dma_ops(struct device *dev)
> >   {
> >   }
> > +
> > +void arm_dma_iommu_detach_device(struct device *dev)
> > +{
> > +}
> > diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> > index af27f1c22d93..6d8af08b3e7d 100644
> > --- a/arch/arm/mm/dma-mapping.c
> > +++ b/arch/arm/mm/dma-mapping.c
> > @@ -2400,3 +2400,19 @@ void arch_teardown_dma_ops(struct device *dev)
> >   	arm_teardown_iommu_dma_ops(dev);
> >   }
> > +
> > +void arm_dma_iommu_detach_device(struct device *dev)
> > +{
> > +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> > +	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
> > +
> > +	if (!mapping)
> > +		return;
> > +
> > +	arm_iommu_release_mapping(mapping);
> 
> Potentially freeing the mapping before you try to operate on it is never the
> best idea. Plus arm_iommu_detach_device() already releases a reference
> appropriately anyway, so it's a double-free.

But the reference released by arm_iommu_detach_device() is to balance
out the reference acquired by arm_iommu_attach_device(), isn't it? In
the above, the arm_iommu_release_mapping() is supposed to drop the
final reference which was obtained by arm_iommu_create_mapping(). The
mapping shouldn't go away irrespective of the order in which these
will be called.

> > +	arm_iommu_detach_device(dev);
> > +
> > +	set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent));
> > +#endif
> > +}
> > +EXPORT_SYMBOL(arm_dma_iommu_detach_device);
> 
> I really don't see why we need an extra function that essentially just
> duplicates arm_iommu_detach_device(). The only real difference here is that
> here you reset the DMA ops more appropriately, but I think the existing
> function should be fixed to do that anyway, since set_dma_ops(dev, NULL) now
> just behaves as an unconditional fallback to the noncoherent arm_dma_ops,
> which clearly isn't always right.

The idea behind making this an extra function is that we can call it
unconditionally and it will do the right things. Granted, that already
doesn't quite work as elegantly anymore as I had hoped since this is
now an ARM specific function, so we need an #ifdef guard anyway.

I don't care very strongly either way, so if you and Christoph can both
agree that we just want arm_iommu_detach_device() to call the proper
variant of set_dma_ops(), that's fine with me, too.

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180530/38fbec80/attachment-0001.sig>

^ permalink raw reply

* [PATCH V2] soc: imx: gpcv2: correct PGC offset
From: Fabio Estevam @ 2018-05-30 12:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527643842-17643-1-git-send-email-Anson.Huang@nxp.com>

Hi Anson,

On Tue, May 29, 2018 at 10:30 PM, Anson Huang <Anson.Huang@nxp.com> wrote:
> Correct MIPI/PCIe/USB_HSIC's PGC offset based on
> design RTL, the values in the Reference Manual
> (Rev. 1, 01/2018 and the older ones) are incorrect.
>
> The correct offset values should be as below:
>
> 0x800 ~ 0x83F: PGC for core0 of A7 platform;
> 0x840 ~ 0x87F: PGC for core1 of A7 platform;
> 0x880 ~ 0x8BF: PGC for SCU of A7 platform;
> 0xA00 ~ 0xA3F: PGC for fastmix/megamix;
> 0xC00 ~ 0xC3F: PGC for MIPI PHY;
> 0xC40 ~ 0xC7F: PGC for PCIe_PHY;
> 0xC80 ~ 0xCBF: PGC for USB OTG1 PHY;
> 0xCC0 ~ 0xCFF: PGC for USB OTG2 PHY;
> 0xD00 ~ 0xD3F: PGC for USB HSIC PHY;
>
> Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
> Acked-by: Andrey Smirnov <andrew.smirnov@gmail.com>

Thanks for the fix:

Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>

^ permalink raw reply

* [PATCH v2 6/6] KVM: arm/arm64: Remove unnecessary CMOs when creating HYP page tables
From: Marc Zyngier @ 2018-05-30 12:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530124706.25284-1-marc.zyngier@arm.com>

There is no need to perform cache maintenance operations when
creating the HYP page tables if we have the multiprocessing
extensions. ARMv7 mandates them with the virtualization support,
and ARMv8 just mandates them unconditionally.

Let's remove these operations.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/mmu.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ad1980d2118a..ccdf544d44c0 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -607,7 +607,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
 		pte = pte_offset_kernel(pmd, addr);
 		kvm_set_pte(pte, pfn_pte(pfn, prot));
 		get_page(virt_to_page(pte));
-		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
 }
@@ -634,7 +633,6 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 			}
 			kvm_pmd_populate(pmd, pte);
 			get_page(virt_to_page(pmd));
-			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
 		}
 
 		next = pmd_addr_end(addr, end);
@@ -667,7 +665,6 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
 			}
 			kvm_pud_populate(pud, pmd);
 			get_page(virt_to_page(pud));
-			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
 		}
 
 		next = pud_addr_end(addr, end);
@@ -704,7 +701,6 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
 			}
 			kvm_pgd_populate(pgd, pud);
 			get_page(virt_to_page(pgd));
-			kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
 		}
 
 		next = pgd_addr_end(addr, end);
-- 
2.17.1

^ permalink raw reply related

* [PATCH v2 5/6] KVM: arm/arm64: Stop using {pmd,pud,pgd}_populate
From: Marc Zyngier @ 2018-05-30 12:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180530124706.25284-1-marc.zyngier@arm.com>

The {pmd,pud,pgd}_populate accessors usage in the kernel have always
been a bit weird in KVM. We don't have a struct mm to pass (and
neither does the kernel most of the time, but still...), and
the 32bit code has all kind of cache maintenance that doesn't make
sense on ARMv7+ when MP extensions are mandatory (which is the
case when the VEs are present).

Let's bite the bullet and provide our own implementations. The
only bit of architectural code left has to do with building the table
entry itself (arm64 having up to 52bit PA, arm lacking PUD level).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_mmu.h   | 4 ++++
 arch/arm64/include/asm/kvm_mmu.h | 7 +++++++
 virt/kvm/arm/mmu.c               | 8 +++++---
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 468ff945efa0..a94ef9833bd3 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -75,6 +75,10 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+#define kvm_mk_pmd(ptep)	__pmd(__pa(ptep) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)	__pud(__pa(pmdp) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)	({ BUILD_BUG(); 0; })
+
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
 	pte_val(pte) |= L_PTE_S2_RDWR;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 26c89b63f604..22c9f7cfdf93 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -170,6 +170,13 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+#define kvm_mk_pmd(ptep)					\
+	__pmd(__phys_to_pmd_val(__pa(ptep) | PMD_TYPE_TABLE))
+#define kvm_mk_pud(pmdp)					\
+	__pud(__phys_to_pud_val(__pa(pmdp) | PMD_TYPE_TABLE))
+#define kvm_mk_pgd(pudp)					\
+	__pgd(__phys_to_pgd_val(__pa(pudp) | PUD_TYPE_TABLE))
+
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
 	pte_val(pte) |= PTE_S2_RDWR;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index c9ed239c0840..ad1980d2118a 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -191,17 +191,19 @@ static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
 
 static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
 {
-	pmd_populate_kernel(NULL, pmdp, ptep);
+	kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
 }
 
 static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
 {
-	pud_populate(NULL, pudp, pmdp);
+	WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
+	dsb(ishst);
 }
 
 static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp)
 {
-	pgd_populate(NULL, pgdp, pudp);
+	WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp));
+	dsb(ishst);
 }
 
 /*
-- 
2.17.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox