* [PATCH v3 2/3] arm64: Increase ARCH_DMA_MINALIGN to 128
2018-05-11 13:55 [PATCH v3 0/3] arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size) Catalin Marinas
2018-05-11 13:55 ` [PATCH v3 1/3] Revert "arm64: Increase the max granular size" Catalin Marinas
@ 2018-05-11 13:55 ` Catalin Marinas
2018-05-11 13:55 ` [PATCH v3 3/3] arm64: Force swiotlb bounce buffering for non-coherent DMA with large CWG Catalin Marinas
2 siblings, 0 replies; 6+ messages in thread
From: Catalin Marinas @ 2018-05-11 13:55 UTC (permalink / raw)
To: linux-arm-kernel
This patch increases the ARCH_DMA_MINALIGN to 128 so that it covers the
currently known Cache Writeback Granule (CTR_EL0.CWG) on arm64 and moves
the fallback in cache_line_size() from L1_CACHE_BYTES to this constant.
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
arch/arm64/include/asm/cache.h | 4 ++--
arch/arm64/kernel/cpufeature.c | 9 ++-------
2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 1dd2c2db0010..5df5cfe1c143 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -43,7 +43,7 @@
* cache before the transfer is done, causing old data to be seen by
* the CPU.
*/
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#define ARCH_DMA_MINALIGN (128)
#ifndef __ASSEMBLY__
@@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void)
static inline int cache_line_size(void)
{
u32 cwg = cache_type_cwg();
- return cwg ? 4 << cwg : L1_CACHE_BYTES;
+ return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9d1b06d67c53..fbee8c17a4e6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void)
void __init setup_cpu_features(void)
{
u32 cwg;
- int cls;
setup_system_capabilities();
mark_const_caps_ready();
@@ -1627,13 +1626,9 @@ void __init setup_cpu_features(void)
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
- cls = cache_line_size();
if (!cwg)
- pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
- cls);
- if (L1_CACHE_BYTES < cls)
- pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
- L1_CACHE_BYTES, cls);
+ pr_warn("No Cache Writeback Granule information, assuming %d\n",
+ ARCH_DMA_MINALIGN);
}
static bool __maybe_unused
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 3/3] arm64: Force swiotlb bounce buffering for non-coherent DMA with large CWG
2018-05-11 13:55 [PATCH v3 0/3] arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size) Catalin Marinas
2018-05-11 13:55 ` [PATCH v3 1/3] Revert "arm64: Increase the max granular size" Catalin Marinas
2018-05-11 13:55 ` [PATCH v3 2/3] arm64: Increase ARCH_DMA_MINALIGN to 128 Catalin Marinas
@ 2018-05-11 13:55 ` Catalin Marinas
2018-05-12 12:38 ` Christoph Hellwig
2 siblings, 1 reply; 6+ messages in thread
From: Catalin Marinas @ 2018-05-11 13:55 UTC (permalink / raw)
To: linux-arm-kernel
On systems with a Cache Writeback Granule (CTR_EL0.CWG) greater than
ARCH_DMA_MINALIGN, DMA cache maintenance on sub-CWG ranges is not safe,
leading to data corruption. If such configuration is detected, the
kernel will force swiotlb bounce buffering for all non-coherent devices.
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/dma-direct.h | 43 +++++++++++++++++++++++++++++++++++++
arch/arm64/mm/dma-mapping.c | 17 +++++++++++++++
arch/arm64/mm/init.c | 3 ++-
4 files changed, 63 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/dma-direct.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..ef56b2478205 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@ config ARM64
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/arm64/include/asm/dma-direct.h b/arch/arm64/include/asm/dma-direct.h
new file mode 100644
index 000000000000..0c18a4d56702
--- /dev/null
+++ b/arch/arm64/include/asm/dma-direct.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_DMA_DIRECT_H
+#define __ASM_DMA_DIRECT_H
+
+#include <linux/jump_label.h>
+#include <linux/swiotlb.h>
+
+#include <asm/cache.h>
+
+DECLARE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce);
+
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ dma_addr_t dev_addr = (dma_addr_t)paddr;
+
+ return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+}
+
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+{
+ phys_addr_t paddr = (phys_addr_t)dev_addr;
+
+ return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return false;
+
+ /*
+ * Force swiotlb buffer bouncing when ARCH_DMA_MINALIGN < CWG. The
+ * swiotlb bounce buffers are aligned to (1 << IO_TLB_SHIFT).
+ */
+ if (static_branch_unlikely(&swiotlb_noncoherent_bounce) &&
+ !is_device_dma_coherent(dev) &&
+ !is_swiotlb_buffer(__dma_to_phys(dev, addr)))
+ return false;
+
+ return addr + size - 1 <= *dev->dma_mask;
+}
+
+#endif /* __ASM_DMA_DIRECT_H */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec0181818..1e9dac8684ca 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,6 +33,7 @@
#include <asm/cacheflush.h>
static int swiotlb __ro_after_init;
+DEFINE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce);
static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
bool coherent)
@@ -504,6 +505,14 @@ static int __init arm64_dma_init(void)
max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
swiotlb = 1;
+ if (WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
+ TAINT_CPU_OUT_OF_SPEC,
+ "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+ ARCH_DMA_MINALIGN, cache_line_size())) {
+ swiotlb = 1;
+ static_branch_enable(&swiotlb_noncoherent_bounce);
+ }
+
return atomic_pool_init();
}
arch_initcall(arm64_dma_init);
@@ -882,6 +891,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
+ /*
+ * Enable swiotlb for buffer bouncing if ARCH_DMA_MINALIGN < CWG.
+ * dma_capable() forces the actual bounce if the device is
+ * non-coherent.
+ */
+ if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && !coherent)
+ iommu = NULL;
+
if (!dev->dma_ops)
dev->dma_ops = &arm64_swiotlb_dma_ops;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9f3c47acf8ff..664acf177799 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -586,7 +586,8 @@ static void __init free_unused_memmap(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+ max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT) ||
+ ARCH_DMA_MINALIGN < cache_line_size())
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
^ permalink raw reply related [flat|nested] 6+ messages in thread