LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 08/20] powerpc/dma: remove the unused dma_nommu_ops export
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/dma.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index dbfc7056d7df..3939589aab04 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -286,7 +286,6 @@ const struct dma_map_ops dma_nommu_ops = {
 	.sync_sg_for_device 		= dma_nommu_sync_sg,
 #endif
 };
-EXPORT_SYMBOL(dma_nommu_ops);
 
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
-- 
2.18.0

^ permalink raw reply related

* [PATCH 09/20] powerpc/dma: remove the unused ISA_DMA_THRESHOLD export
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/setup_32.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 74457485574b..3c2d093f74c7 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -55,7 +55,6 @@ unsigned long ISA_DMA_THRESHOLD;
 unsigned int DMA_MODE_READ;
 unsigned int DMA_MODE_WRITE;
 
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
 
-- 
2.18.0

^ permalink raw reply related

* [PATCH 11/20] powerpc/dma: split the two __dma_alloc_coherent implementations
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

The implemementation for the CONFIG_NOT_COHERENT_CACHE case doesn't share
any code with the one for systems with coherent caches.  Split it off
and merge it with the helpers in dma-noncoherent.c that have no other
callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/dma-mapping.h |  5 -----
 arch/powerpc/kernel/dma.c              | 14 ++------------
 arch/powerpc/mm/dma-noncoherent.c      | 15 +++++++--------
 arch/powerpc/platforms/44x/warp.c      |  2 +-
 4 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index f2a4a7142b1e..dacd0f93f2b2 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -39,9 +39,6 @@ extern int dma_nommu_mmap_coherent(struct device *dev,
  * to ensure it is consistent.
  */
 struct device;
-extern void *__dma_alloc_coherent(struct device *dev, size_t size,
-				  dma_addr_t *handle, gfp_t gfp);
-extern void __dma_free_coherent(size_t size, void *vaddr);
 extern void __dma_sync(void *vaddr, size_t size, int direction);
 extern void __dma_sync_page(struct page *page, unsigned long offset,
 				 size_t size, int direction);
@@ -52,8 +49,6 @@ extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
  * Cache coherent cores.
  */
 
-#define __dma_alloc_coherent(dev, gfp, size, handle)	NULL
-#define __dma_free_coherent(size, addr)		((void)0)
 #define __dma_sync(addr, size, rw)		((void)0)
 #define __dma_sync_page(pg, off, sz, rw)	((void)0)
 
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 3939589aab04..eceaa92e6986 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -62,18 +62,12 @@ static int dma_nommu_dma_supported(struct device *dev, u64 mask)
 #endif
 }
 
+#ifndef CONFIG_NOT_COHERENT_CACHE
 void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flag,
 				  unsigned long attrs)
 {
 	void *ret;
-#ifdef CONFIG_NOT_COHERENT_CACHE
-	ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
-	if (ret == NULL)
-		return NULL;
-	*dma_handle += get_dma_offset(dev);
-	return ret;
-#else
 	struct page *page;
 	int node = dev_to_node(dev);
 #ifdef CONFIG_FSL_SOC
@@ -113,19 +107,15 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
 	*dma_handle = __pa(ret) + get_dma_offset(dev);
 
 	return ret;
-#endif
 }
 
 void __dma_nommu_free_coherent(struct device *dev, size_t size,
 				void *vaddr, dma_addr_t dma_handle,
 				unsigned long attrs)
 {
-#ifdef CONFIG_NOT_COHERENT_CACHE
-	__dma_free_coherent(size, vaddr);
-#else
 	free_pages((unsigned long)vaddr, get_order(size));
-#endif
 }
+#endif /* !CONFIG_NOT_COHERENT_CACHE */
 
 static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag,
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index d1c16456abac..cfc48a253707 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -29,7 +29,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/highmem.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/export.h>
 
 #include <asm/tlbflush.h>
@@ -151,8 +151,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
  */
-void *
-__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
+void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	struct page *page;
 	struct ppc_vm_region *c;
@@ -223,7 +223,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
 		/*
 		 * Set the "dma handle"
 		 */
-		*handle = page_to_phys(page);
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
 
 		do {
 			SetPageReserved(page);
@@ -249,12 +249,12 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
  no_page:
 	return NULL;
 }
-EXPORT_SYMBOL(__dma_alloc_coherent);
 
 /*
  * free a page as defined by the above mapping.
  */
-void __dma_free_coherent(size_t size, void *vaddr)
+void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct ppc_vm_region *c;
 	unsigned long flags, addr;
@@ -309,7 +309,6 @@ void __dma_free_coherent(size_t size, void *vaddr)
 	       __func__, vaddr);
 	dump_stack();
 }
-EXPORT_SYMBOL(__dma_free_coherent);
 
 /*
  * make an area consistent.
@@ -397,7 +396,7 @@ EXPORT_SYMBOL(__dma_sync_page);
 
 /*
  * Return the PFN for a given cpu virtual address returned by
- * __dma_alloc_coherent. This is used by dma_mmap_coherent()
+ * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
  */
 unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
 {
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index a886c2c22097..7e4f8ca19ce8 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -47,7 +47,7 @@ static int __init warp_probe(void)
 	if (!of_machine_is_compatible("pika,warp"))
 		return 0;
 
-	/* For __dma_alloc_coherent */
+	/* For __dma_nommu_alloc_coherent */
 	ISA_DMA_THRESHOLD = ~0L;
 
 	return 1;
-- 
2.18.0

^ permalink raw reply related

* [PATCH 10/20] powerpc/dma-noncoherent: don't disable irqs over kmap_atomic
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

The requirement to disable local irqs over kmap_atomic is long gone,
so remove those calls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/mm/dma-noncoherent.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 382528475433..d1c16456abac 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -357,12 +357,10 @@ static inline void __dma_sync_page_highmem(struct page *page,
 {
 	size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
 	size_t cur_size = seg_size;
-	unsigned long flags, start, seg_offset = offset;
+	unsigned long start, seg_offset = offset;
 	int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
 	int seg_nr = 0;
 
-	local_irq_save(flags);
-
 	do {
 		start = (unsigned long)kmap_atomic(page + seg_nr) + seg_offset;
 
@@ -378,8 +376,6 @@ static inline void __dma_sync_page_highmem(struct page *page,
 		cur_size += seg_size;
 		seg_offset = 0;
 	} while (seg_nr < nr_segs);
-
-	local_irq_restore(flags);
 }
 #endif /* CONFIG_HIGHMEM */
 
-- 
2.18.0

^ permalink raw reply related

* [PATCH 12/20] powerpc/dma: use phys_to_dma instead of get_dma_offset
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

Use the standard portable helper instead of the powerpc specific one,
which is about to go away.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/dma-swiotlb.c |  5 ++---
 arch/powerpc/kernel/dma.c         | 12 ++++++------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f..f6e0701c5303 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/of_platform.h>
@@ -31,9 +31,8 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
 	end = memblock_end_of_DRAM();
 	if (max_direct_dma_addr && end > max_direct_dma_addr)
 		end = max_direct_dma_addr;
-	end += get_dma_offset(dev);
 
-	mask = 1ULL << (fls64(end) - 1);
+	mask = 1ULL << (fls64(phys_to_dma(dev, end)) - 1);
 	mask += mask - 1;
 
 	return mask;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index eceaa92e6986..3487de83bb37 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/device.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/dma-debug.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
@@ -43,7 +43,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev)
 static int dma_nommu_dma_supported(struct device *dev, u64 mask)
 {
 #ifdef CONFIG_PPC64
-	u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
+	u64 limit = phys_to_dma(dev, (memblock_end_of_DRAM() - 1));
 
 	/* Limit fits in the mask, we are good */
 	if (mask >= limit)
@@ -104,7 +104,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
 		return NULL;
 	ret = page_address(page);
 	memset(ret, 0, size);
-	*dma_handle = __pa(ret) + get_dma_offset(dev);
+	*dma_handle = phys_to_dma(dev,__pa(ret));
 
 	return ret;
 }
@@ -188,7 +188,7 @@ static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
 	int i;
 
 	for_each_sg(sgl, sg, nents, i) {
-		sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
+		sg->dma_address = phys_to_dma(dev, sg_phys(sg));
 		sg->dma_length = sg->length;
 
 		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
@@ -210,7 +210,7 @@ static u64 dma_nommu_get_required_mask(struct device *dev)
 {
 	u64 end, mask;
 
-	end = memblock_end_of_DRAM() + get_dma_offset(dev);
+	end = phys_to_dma(dev, memblock_end_of_DRAM());
 
 	mask = 1ULL << (fls64(end) - 1);
 	mask += mask - 1;
@@ -228,7 +228,7 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		__dma_sync_page(page, offset, size, dir);
 
-	return page_to_phys(page) + offset + get_dma_offset(dev);
+	return phys_to_dma(dev, page_to_phys(page)) + offset;
 }
 
 static inline void dma_nommu_unmap_page(struct device *dev,
-- 
2.18.0

^ permalink raw reply related

* [PATCH 13/20] powerpc/dma: remove get_dma_offset
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

Just fold the calculation into __phys_to_dma/__dma_to_phys as those are
the only places that should know about it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/dma-direct.h  |  8 ++++++--
 arch/powerpc/include/asm/dma-mapping.h | 16 ----------------
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index 7702875aabb7..0fba19445ae8 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -19,11 +19,15 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 
 static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	return paddr + get_dma_offset(dev);
+	if (!dev)
+		return paddr + PCI_DRAM_OFFSET;
+	return paddr + dev->archdata.dma_offset;
 }
 
 static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	return daddr - get_dma_offset(dev);
+	if (!dev)
+		return daddr - PCI_DRAM_OFFSET;
+	return daddr - dev->archdata.dma_offset;
 }
 #endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index dacd0f93f2b2..f0bf7ac2686c 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -80,22 +80,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 	return NULL;
 }
 
-/*
- * get_dma_offset()
- *
- * Get the dma offset on configurations where the dma address can be determined
- * from the physical address by looking at a simple offset.  Direct dma and
- * swiotlb use this function, but it is typically not used by implementations
- * with an iommu.
- */
-static inline dma_addr_t get_dma_offset(struct device *dev)
-{
-	if (dev)
-		return dev->archdata.dma_offset;
-
-	return PCI_DRAM_OFFSET;
-}
-
 static inline void set_dma_offset(struct device *dev, dma_addr_t off)
 {
 	if (dev)
-- 
2.18.0

^ permalink raw reply related

* [PATCH 14/20] powerpc/dma: replace dma_nommu_dma_supported with dma_direct_supported
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

The ppc32 case of dma_nommu_dma_supported already was a no-op, and the
64-bit case came to the same conclusion as dma_direct_supported, so
replace it with the generic version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/Kconfig      |  1 +
 arch/powerpc/kernel/dma.c | 28 +++-------------------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f9cae7edd735..bbfa6a8df4da 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -158,6 +158,7 @@ config PPC
 	select CLONE_BACKWARDS
 	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
 	select DYNAMIC_FTRACE			if FUNCTION_TRACER
+	select DMA_DIRECT_OPS
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
 	select GENERIC_ATOMIC64			if PPC32
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 3487de83bb37..511a4972560d 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -40,28 +40,6 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev)
 	return pfn;
 }
 
-static int dma_nommu_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PPC64
-	u64 limit = phys_to_dma(dev, (memblock_end_of_DRAM() - 1));
-
-	/* Limit fits in the mask, we are good */
-	if (mask >= limit)
-		return 1;
-
-#ifdef CONFIG_FSL_SOC
-	/* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
-	 * that will have to be refined if/when they support iommus
-	 */
-	return 1;
-#endif
-	/* Sorry ... */
-	return 0;
-#else
-	return 1;
-#endif
-}
-
 #ifndef CONFIG_NOT_COHERENT_CACHE
 void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flag,
@@ -126,7 +104,7 @@ static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
 	/* The coherent mask may be smaller than the real mask, check if
 	 * we can really use the direct ops
 	 */
-	if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
+	if (dma_direct_supported(dev, dev->coherent_dma_mask))
 		return __dma_nommu_alloc_coherent(dev, size, dma_handle,
 						   flag, attrs);
 
@@ -148,7 +126,7 @@ static void dma_nommu_free_coherent(struct device *dev, size_t size,
 	struct iommu_table *iommu;
 
 	/* See comments in dma_nommu_alloc_coherent() */
-	if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
+	if (dma_direct_supported(dev, dev->coherent_dma_mask))
 		return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
 						  attrs);
 	/* Maybe we used an iommu ... */
@@ -265,7 +243,7 @@ const struct dma_map_ops dma_nommu_ops = {
 	.mmap				= dma_nommu_mmap_coherent,
 	.map_sg				= dma_nommu_map_sg,
 	.unmap_sg			= dma_nommu_unmap_sg,
-	.dma_supported			= dma_nommu_dma_supported,
+	.dma_supported			= dma_direct_supported,
 	.map_page			= dma_nommu_map_page,
 	.unmap_page			= dma_nommu_unmap_page,
 	.get_required_mask		= dma_nommu_get_required_mask,
-- 
2.18.0

^ permalink raw reply related

* [PATCH 16/20] powerpc/dma: use dma_direct_{alloc,free}
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

These do the same functionality as the existing helpers, but do it
simpler, and also allow the (optional) use of CMA.

Note that the swiotlb code now calls into the dma_direct code directly,
given that it doesn't work with noncoherent caches at all, and isn't called
when we have an iommu either, so the iommu special case in
dma_nommu_alloc_coherent isn't required for swiotlb.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/pgtable.h |  1 -
 arch/powerpc/kernel/dma-swiotlb.c  |  4 +-
 arch/powerpc/kernel/dma.c          | 78 ++++--------------------------
 arch/powerpc/mm/mem.c              | 19 --------
 4 files changed, 11 insertions(+), 91 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..123de4958d2e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -38,7 +38,6 @@ extern unsigned long empty_zero_page[];
 extern pgd_t swapper_pg_dir[];
 
 void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
-int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
 /*
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index f6e0701c5303..25986fcd1e5e 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -46,8 +46,8 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
  * for everything else.
  */
 const struct dma_map_ops powerpc_swiotlb_dma_ops = {
-	.alloc = __dma_nommu_alloc_coherent,
-	.free = __dma_nommu_free_coherent,
+	.alloc = dma_direct_alloc,
+	.free = dma_direct_free,
 	.mmap = dma_nommu_mmap_coherent,
 	.map_sg = swiotlb_map_sg_attrs,
 	.unmap_sg = swiotlb_unmap_sg_attrs,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 2cfc45acbb52..2b90a403cdac 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -26,75 +26,6 @@
  * can set archdata.dma_data to an unsigned long holding the offset. By
  * default the offset is PCI_DRAM_OFFSET.
  */
-
-static u64 __maybe_unused get_pfn_limit(struct device *dev)
-{
-	u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
-	struct dev_archdata __maybe_unused *sd = &dev->archdata;
-
-#ifdef CONFIG_SWIOTLB
-	if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops)
-		pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
-#endif
-
-	return pfn;
-}
-
-#ifndef CONFIG_NOT_COHERENT_CACHE
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
-				  dma_addr_t *dma_handle, gfp_t flag,
-				  unsigned long attrs)
-{
-	void *ret;
-	struct page *page;
-	int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
-	u64 pfn = get_pfn_limit(dev);
-	int zone;
-
-	/*
-	 * This code should be OK on other platforms, but we have drivers that
-	 * don't set coherent_dma_mask. As a workaround we just ifdef it. This
-	 * whole routine needs some serious cleanup.
-	 */
-
-	zone = dma_pfn_limit_to_zone(pfn);
-	if (zone < 0) {
-		dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
-			__func__, pfn);
-		return NULL;
-	}
-
-	switch (zone) {
-	case ZONE_DMA:
-		flag |= GFP_DMA;
-		break;
-#ifdef CONFIG_ZONE_DMA32
-	case ZONE_DMA32:
-		flag |= GFP_DMA32;
-		break;
-#endif
-	};
-#endif /* CONFIG_FSL_SOC */
-
-	page = alloc_pages_node(node, flag, get_order(size));
-	if (page == NULL)
-		return NULL;
-	ret = page_address(page);
-	memset(ret, 0, size);
-	*dma_handle = phys_to_dma(dev,__pa(ret));
-
-	return ret;
-}
-
-void __dma_nommu_free_coherent(struct device *dev, size_t size,
-				void *vaddr, dma_addr_t dma_handle,
-				unsigned long attrs)
-{
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-#endif /* !CONFIG_NOT_COHERENT_CACHE */
-
 static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag,
 				       unsigned long attrs)
@@ -105,8 +36,12 @@ static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
 	 * we can really use the direct ops
 	 */
 	if (dma_direct_supported(dev, dev->coherent_dma_mask))
+#ifdef CONFIG_NOT_COHERENT_CACHE
 		return __dma_nommu_alloc_coherent(dev, size, dma_handle,
 						   flag, attrs);
+#else
+		return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
+#endif
 
 	/* Ok we can't ... do we have an iommu ? If not, fail */
 	iommu = get_iommu_table_base(dev);
@@ -127,8 +62,13 @@ static void dma_nommu_free_coherent(struct device *dev, size_t size,
 
 	/* See comments in dma_nommu_alloc_coherent() */
 	if (dma_direct_supported(dev, dev->coherent_dma_mask))
+#ifdef CONFIG_NOT_COHERENT_CACHE
 		return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
 						  attrs);
+#else
+		return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+#endif
+
 	/* Maybe we used an iommu ... */
 	iommu = get_iommu_table_base(dev);
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5c8530d0c611..ec8ed9d7abef 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -276,25 +276,6 @@ void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
 	}
 }
 
-/*
- * Find the least restrictive zone that is entirely below the
- * specified pfn limit.  Returns < 0 if no suitable zone is found.
- *
- * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
- * systems -- the DMA limit can be higher than any possible real pfn.
- */
-int dma_pfn_limit_to_zone(u64 pfn_limit)
-{
-	int i;
-
-	for (i = TOP_ZONE; i >= 0; i--) {
-		if (max_zone_pfns[i] <= pfn_limit)
-			return i;
-	}
-
-	return -EPERM;
-}
-
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
  */
-- 
2.18.0

^ permalink raw reply related

* [PATCH 15/20] powerpc/dma: remove the unused unmap_page and unmap_sg methods
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

These methods are optional to start with, no need to implement no-op
versions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/dma.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 511a4972560d..2cfc45acbb52 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -178,12 +178,6 @@ static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
 	return nents;
 }
 
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction,
-				unsigned long attrs)
-{
-}
-
 static u64 dma_nommu_get_required_mask(struct device *dev)
 {
 	u64 end, mask;
@@ -209,14 +203,6 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev,
 	return phys_to_dma(dev, page_to_phys(page)) + offset;
 }
 
-static inline void dma_nommu_unmap_page(struct device *dev,
-					 dma_addr_t dma_address,
-					 size_t size,
-					 enum dma_data_direction direction,
-					 unsigned long attrs)
-{
-}
-
 #ifdef CONFIG_NOT_COHERENT_CACHE
 static inline void dma_nommu_sync_sg(struct device *dev,
 		struct scatterlist *sgl, int nents,
@@ -242,10 +228,8 @@ const struct dma_map_ops dma_nommu_ops = {
 	.free				= dma_nommu_free_coherent,
 	.mmap				= dma_nommu_mmap_coherent,
 	.map_sg				= dma_nommu_map_sg,
-	.unmap_sg			= dma_nommu_unmap_sg,
 	.dma_supported			= dma_direct_supported,
 	.map_page			= dma_nommu_map_page,
-	.unmap_page			= dma_nommu_unmap_page,
 	.get_required_mask		= dma_nommu_get_required_mask,
 #ifdef CONFIG_NOT_COHERENT_CACHE
 	.sync_single_for_cpu 		= dma_nommu_sync_single,
-- 
2.18.0

^ permalink raw reply related

* [PATCH 17/20] powerpc/dma-swiotlb: use generic swiotlb_dma_ops
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

These are identical to the arch specific ones, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/dma-direct.h |  4 ++++
 arch/powerpc/include/asm/swiotlb.h    |  2 --
 arch/powerpc/kernel/dma-swiotlb.c     | 28 ++-------------------------
 arch/powerpc/sysdev/fsl_pci.c         |  2 +-
 4 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index 0fba19445ae8..657f84ddb20d 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -30,4 +30,8 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 		return daddr - PCI_DRAM_OFFSET;
 	return daddr - dev->archdata.dma_offset;
 }
+
+u64 swiotlb_powerpc_get_required(struct device *dev);
+#define swiotlb_get_required_mask swiotlb_powerpc_get_required
+
 #endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
index f65ecf57b66c..1d8c1da26ab3 100644
--- a/arch/powerpc/include/asm/swiotlb.h
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -13,8 +13,6 @@
 
 #include <linux/swiotlb.h>
 
-extern const struct dma_map_ops powerpc_swiotlb_dma_ops;
-
 extern unsigned int ppc_swiotlb_enable;
 int __init swiotlb_setup_bus_notifier(void);
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 25986fcd1e5e..0c269de61f39 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -24,7 +24,7 @@
 
 unsigned int ppc_swiotlb_enable;
 
-static u64 swiotlb_powerpc_get_required(struct device *dev)
+u64 swiotlb_powerpc_get_required(struct device *dev)
 {
 	u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
 
@@ -38,30 +38,6 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
 	return mask;
 }
 
-/*
- * At the moment, all platforms that use this code only require
- * swiotlb to be used if we're operating on HIGHMEM.  Since
- * we don't ever call anything other than map_sg, unmap_sg,
- * map_page, and unmap_page on highmem, use normal dma_ops
- * for everything else.
- */
-const struct dma_map_ops powerpc_swiotlb_dma_ops = {
-	.alloc = dma_direct_alloc,
-	.free = dma_direct_free,
-	.mmap = dma_nommu_mmap_coherent,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.dma_supported = swiotlb_dma_supported,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
-	.get_required_mask = swiotlb_powerpc_get_required,
-};
-
 void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
 {
 	struct pci_controller *hose;
@@ -88,7 +64,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
 
 	/* May need to bounce if the device can't address all of DRAM */
 	if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
-		set_dma_ops(dev, &powerpc_swiotlb_dma_ops);
+		set_dma_ops(dev, &swiotlb_dma_ops);
 
 	return NOTIFY_DONE;
 }
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 918be816b097..daf44bc0108d 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -118,7 +118,7 @@ static void setup_swiotlb_ops(struct pci_controller *hose)
 {
 	if (ppc_swiotlb_enable) {
 		hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
-		set_pci_dma_ops(&powerpc_swiotlb_dma_ops);
+		set_pci_dma_ops(&swiotlb_dma_ops);
 	}
 }
 #else
-- 
2.18.0

^ permalink raw reply related

* [PATCH 18/20] powerpc/dma-noncoherent: use generic dma_noncoherent_ops
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

The generic dma-noncoherent code provides all that is needed by powerpc.

Note that the cache maintainance in the existing code is a bit odd
as it implements both the sync_to_device and sync_to_cpu callouts,
but never flushes caches when unmapping.  This patch keeps both
directions arounds, which will lead to more flushing than the previous
implementation.  Someone more familar with the required CPUs should
eventually take a look and optimize the cache flush handling if needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/Kconfig                   |  2 +-
 arch/powerpc/include/asm/dma-mapping.h | 29 -------------
 arch/powerpc/kernel/dma.c              | 59 +++-----------------------
 arch/powerpc/kernel/pci-common.c       |  5 ++-
 arch/powerpc/kernel/setup-common.c     |  4 ++
 arch/powerpc/mm/dma-noncoherent.c      | 52 +++++++++++++++++------
 arch/powerpc/platforms/44x/warp.c      |  2 +-
 arch/powerpc/platforms/Kconfig.cputype |  6 ++-
 8 files changed, 60 insertions(+), 99 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bbfa6a8df4da..33c6017ffce6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
 	# Please keep this list sorted alphabetically.
 	#
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_HAS_DMA_SET_COHERENT_MASK if !NOT_COHERENT_CACHE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index f0bf7ac2686c..879c4efba785 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -19,40 +19,11 @@
 #include <asm/swiotlb.h>
 
 /* Some dma direct funcs must be visible for use in other dma_ops */
-extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
-					 dma_addr_t *dma_handle, gfp_t flag,
-					 unsigned long attrs);
-extern void __dma_nommu_free_coherent(struct device *dev, size_t size,
-				       void *vaddr, dma_addr_t dma_handle,
-				       unsigned long attrs);
 extern int dma_nommu_mmap_coherent(struct device *dev,
 				    struct vm_area_struct *vma,
 				    void *cpu_addr, dma_addr_t handle,
 				    size_t size, unsigned long attrs);
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * DMA-consistent mapping functions for PowerPCs that don't support
- * cache snooping.  These allocate/free a region of uncached mapped
- * memory space for use with DMA devices.  Alternatively, you could
- * allocate the space "normally" and use the cache management functions
- * to ensure it is consistent.
- */
-struct device;
-extern void __dma_sync(void *vaddr, size_t size, int direction);
-extern void __dma_sync_page(struct page *page, unsigned long offset,
-				 size_t size, int direction);
-extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
-
-#else /* ! CONFIG_NOT_COHERENT_CACHE */
-/*
- * Cache coherent cores.
- */
-
-#define __dma_sync(addr, size, rw)		((void)0)
-#define __dma_sync_page(pg, off, sz, rw)	((void)0)
-
-#endif /* ! CONFIG_NOT_COHERENT_CACHE */
 
 static inline unsigned long device_to_mask(struct device *dev)
 {
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 2b90a403cdac..b2e88075b2ea 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -36,12 +36,7 @@ static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
 	 * we can really use the direct ops
 	 */
 	if (dma_direct_supported(dev, dev->coherent_dma_mask))
-#ifdef CONFIG_NOT_COHERENT_CACHE
-		return __dma_nommu_alloc_coherent(dev, size, dma_handle,
-						   flag, attrs);
-#else
 		return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
-#endif
 
 	/* Ok we can't ... do we have an iommu ? If not, fail */
 	iommu = get_iommu_table_base(dev);
@@ -62,12 +57,7 @@ static void dma_nommu_free_coherent(struct device *dev, size_t size,
 
 	/* See comments in dma_nommu_alloc_coherent() */
 	if (dma_direct_supported(dev, dev->coherent_dma_mask))
-#ifdef CONFIG_NOT_COHERENT_CACHE
-		return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
-						  attrs);
-#else
 		return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-#endif
 
 	/* Maybe we used an iommu ... */
 	iommu = get_iommu_table_base(dev);
@@ -84,14 +74,8 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     void *cpu_addr, dma_addr_t handle, size_t size,
 			     unsigned long attrs)
 {
-	unsigned long pfn;
+	unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
-	pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
 	return remap_pfn_range(vma, vma->vm_start,
 			       pfn + vma->vm_pgoff,
 			       vma->vm_end - vma->vm_start,
@@ -108,17 +92,13 @@ static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
 	for_each_sg(sgl, sg, nents, i) {
 		sg->dma_address = phys_to_dma(dev, sg_phys(sg));
 		sg->dma_length = sg->length;
-
-		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-			continue;
-
-		__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
 	}
 
 	return nents;
 }
 
-static u64 dma_nommu_get_required_mask(struct device *dev)
+/* note: needs to be called arch_get_required_mask for dma-noncoherent.c */
+u64 arch_get_required_mask(struct device *dev)
 {
 	u64 end, mask;
 
@@ -137,32 +117,9 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev,
 					     enum dma_data_direction dir,
 					     unsigned long attrs)
 {
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_sync_page(page, offset, size, dir);
-
 	return phys_to_dma(dev, page_to_phys(page)) + offset;
 }
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
-static inline void dma_nommu_sync_sg(struct device *dev,
-		struct scatterlist *sgl, int nents,
-		enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static inline void dma_nommu_sync_single(struct device *dev,
-					  dma_addr_t dma_handle, size_t size,
-					  enum dma_data_direction direction)
-{
-	__dma_sync(bus_to_virt(dma_handle), size, direction);
-}
-#endif
-
 const struct dma_map_ops dma_nommu_ops = {
 	.alloc				= dma_nommu_alloc_coherent,
 	.free				= dma_nommu_free_coherent,
@@ -170,15 +127,10 @@ const struct dma_map_ops dma_nommu_ops = {
 	.map_sg				= dma_nommu_map_sg,
 	.dma_supported			= dma_direct_supported,
 	.map_page			= dma_nommu_map_page,
-	.get_required_mask		= dma_nommu_get_required_mask,
-#ifdef CONFIG_NOT_COHERENT_CACHE
-	.sync_single_for_cpu 		= dma_nommu_sync_single,
-	.sync_single_for_device 	= dma_nommu_sync_single,
-	.sync_sg_for_cpu 		= dma_nommu_sync_sg,
-	.sync_sg_for_device 		= dma_nommu_sync_sg,
-#endif
+	.get_required_mask		= arch_get_required_mask,
 };
 
+#ifndef CONFIG_NOT_COHERENT_CACHE
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
 	if (!dma_supported(dev, mask)) {
@@ -197,6 +149,7 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 	return 0;
 }
 EXPORT_SYMBOL(dma_set_coherent_mask);
+#endif
 
 int dma_set_mask(struct device *dev, u64 dma_mask)
 {
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index fe9733ffffaa..898ffb636b75 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -59,8 +59,11 @@ static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
 resource_size_t isa_mem_base;
 EXPORT_SYMBOL(isa_mem_base);
 
-
+#ifdef CONFIG_NOT_COHERENT_CACHE
+static const struct dma_map_ops *pci_dma_ops = &dma_noncoherent_ops;
+#else
 static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
+#endif
 
 void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
 {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 40b44bb53a4e..2488826fa543 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -792,7 +792,11 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
 {
 	pdev->archdata.dma_mask = DMA_BIT_MASK(32);
 	pdev->dev.dma_mask = &pdev->archdata.dma_mask;
+#ifdef CONFIG_NOT_COHERENT_CACHE
+	set_dma_ops(&pdev->dev, &dma_noncoherent_ops);
+#else
  	set_dma_ops(&pdev->dev, &dma_nommu_ops);
+#endif
 }
 
 static __init void print_system_info(void)
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index cfc48a253707..1ceea32c0112 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -30,6 +30,7 @@
 #include <linux/types.h>
 #include <linux/highmem.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/export.h>
 
 #include <asm/tlbflush.h>
@@ -151,8 +152,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
  */
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	struct page *page;
 	struct ppc_vm_region *c;
@@ -253,7 +254,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
 /*
  * free a page as defined by the above mapping.
  */
-void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct ppc_vm_region *c;
@@ -313,7 +314,7 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
 /*
  * make an area consistent.
  */
-void __dma_sync(void *vaddr, size_t size, int direction)
+static void __dma_sync(void *vaddr, size_t size, int direction)
 {
 	unsigned long start = (unsigned long)vaddr;
 	unsigned long end   = start + size;
@@ -339,7 +340,6 @@ void __dma_sync(void *vaddr, size_t size, int direction)
 		break;
 	}
 }
-EXPORT_SYMBOL(__dma_sync);
 
 #ifdef CONFIG_HIGHMEM
 /*
@@ -382,23 +382,36 @@ static inline void __dma_sync_page_highmem(struct page *page,
  * __dma_sync_page makes memory consistent. identical to __dma_sync, but
  * takes a struct page instead of a virtual address
  */
-void __dma_sync_page(struct page *page, unsigned long offset,
-	size_t size, int direction)
+static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
 {
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned offset = paddr & ~PAGE_MASK;
+
 #ifdef CONFIG_HIGHMEM
-	__dma_sync_page_highmem(page, offset, size, direction);
+	__dma_sync_page_highmem(page, offset, size, dir);
 #else
 	unsigned long start = (unsigned long)page_address(page) + offset;
-	__dma_sync((void *)start, size, direction);
+	__dma_sync((void *)start, size, dir);
 #endif
 }
-EXPORT_SYMBOL(__dma_sync_page);
+
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	__dma_sync_page(paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	__dma_sync_page(paddr, size, dir);
+}
 
 /*
- * Return the PFN for a given cpu virtual address returned by
- * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
+ * Return the PFN for a given cpu virtual address returned by __arch_dma_alloc.
+ * This is used by dma_mmap_coherent()
  */
-unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
+static unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
 {
 	/* This should always be populated, so we don't test every
 	 * level. If that fails, we'll have a nice crash which
@@ -413,3 +426,16 @@ unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
 		return 0;
 	return pte_pfn(*ptep);
 }
+
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t handle, size_t size,
+			     unsigned long attrs)
+{
+	unsigned long pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return remap_pfn_range(vma, vma->vm_start,
+			       pfn + vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 7e4f8ca19ce8..c0e6fb270d59 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -47,7 +47,7 @@ static int __init warp_probe(void)
 	if (!of_machine_is_compatible("pika,warp"))
 		return 0;
 
-	/* For __dma_nommu_alloc_coherent */
+	/* For arch_dma_alloc */
 	ISA_DMA_THRESHOLD = ~0L;
 
 	return 1;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a2578bf8d560..9d83f54ccf11 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -387,7 +387,11 @@ config NOT_COHERENT_CACHE
 	depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
 	default n if PPC_47x
 	default y
-	select NEED_DMA_MAP_STATE
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select DMA_NONCOHERENT_GET_REQUIRED
+	select DMA_NONCOHERENT_MMAP
+	select DMA_NONCOHERENT_OPS
 
 config CHECK_CACHE_COHERENCY
 	bool
-- 
2.18.0

^ permalink raw reply related

* [PATCH 19/20] powerpc/dma: use the generic dma-direct map_page and map_sg routines
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

These are indentical except for additional error checking, so migrate
to the common code, and wire up the get_mapping_error method as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/kernel/dma.c | 32 ++++----------------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index b2e88075b2ea..08b12cbd7abf 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -82,21 +82,6 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			       vma->vm_page_prot);
 }
 
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
-			     int nents, enum dma_data_direction direction,
-			     unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i) {
-		sg->dma_address = phys_to_dma(dev, sg_phys(sg));
-		sg->dma_length = sg->length;
-	}
-
-	return nents;
-}
-
 /* note: needs to be called arch_get_required_mask for dma-noncoherent.c */
 u64 arch_get_required_mask(struct device *dev)
 {
@@ -110,24 +95,15 @@ u64 arch_get_required_mask(struct device *dev)
 	return mask;
 }
 
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
-					     struct page *page,
-					     unsigned long offset,
-					     size_t size,
-					     enum dma_data_direction dir,
-					     unsigned long attrs)
-{
-	return phys_to_dma(dev, page_to_phys(page)) + offset;
-}
-
 const struct dma_map_ops dma_nommu_ops = {
 	.alloc				= dma_nommu_alloc_coherent,
 	.free				= dma_nommu_free_coherent,
 	.mmap				= dma_nommu_mmap_coherent,
-	.map_sg				= dma_nommu_map_sg,
-	.dma_supported			= dma_direct_supported,
-	.map_page			= dma_nommu_map_page,
+	.map_sg				= dma_direct_map_sg,
+	.map_page			= dma_direct_map_page,
 	.get_required_mask		= arch_get_required_mask,
+	.dma_supported			= dma_direct_supported,
+	.mapping_error			= dma_direct_mapping_error,
 };
 
 #ifndef CONFIG_NOT_COHERENT_CACHE
-- 
2.18.0

^ permalink raw reply related

* [PATCH 20/20] powerpc/dma: remove dma_nommu_mmap_coherent
From: Christoph Hellwig @ 2018-07-30 16:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Tony Luck, Fenghua Yu
  Cc: Konrad Rzeszutek Wilk, Robin Murphy, linuxppc-dev, iommu,
	linux-ia64
In-Reply-To: <20180730163824.10064-1-hch@lst.de>

The remaining implementation for coherent caches is functionally
identical to the default provided in common code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/include/asm/dma-mapping.h |  7 -------
 arch/powerpc/kernel/dma-iommu.c        |  1 -
 arch/powerpc/kernel/dma.c              | 13 -------------
 arch/powerpc/platforms/pseries/vio.c   |  1 -
 4 files changed, 22 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 879c4efba785..e62e23aa3714 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -18,13 +18,6 @@
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 
-/* Some dma direct funcs must be visible for use in other dma_ops */
-extern int dma_nommu_mmap_coherent(struct device *dev,
-				    struct vm_area_struct *vma,
-				    void *cpu_addr, dma_addr_t handle,
-				    size_t size, unsigned long attrs);
-
-
 static inline unsigned long device_to_mask(struct device *dev)
 {
 	if (dev->dma_mask && *dev->dma_mask)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index f9fe2080ceb9..bf5234e1f71b 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -114,7 +114,6 @@ int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
 struct dma_map_ops dma_iommu_ops = {
 	.alloc			= dma_iommu_alloc_coherent,
 	.free			= dma_iommu_free_coherent,
-	.mmap			= dma_nommu_mmap_coherent,
 	.map_sg			= dma_iommu_map_sg,
 	.unmap_sg		= dma_iommu_unmap_sg,
 	.dma_supported		= dma_iommu_dma_supported,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 08b12cbd7abf..5b71c9d1b8cc 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -70,18 +70,6 @@ static void dma_nommu_free_coherent(struct device *dev, size_t size,
 	iommu_free_coherent(iommu, size, vaddr, dma_handle);
 }
 
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
-			     void *cpu_addr, dma_addr_t handle, size_t size,
-			     unsigned long attrs)
-{
-	unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
-
-	return remap_pfn_range(vma, vma->vm_start,
-			       pfn + vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start,
-			       vma->vm_page_prot);
-}
-
 /* note: needs to be called arch_get_required_mask for dma-noncoherent.c */
 u64 arch_get_required_mask(struct device *dev)
 {
@@ -98,7 +86,6 @@ u64 arch_get_required_mask(struct device *dev)
 const struct dma_map_ops dma_nommu_ops = {
 	.alloc				= dma_nommu_alloc_coherent,
 	.free				= dma_nommu_free_coherent,
-	.mmap				= dma_nommu_mmap_coherent,
 	.map_sg				= dma_direct_map_sg,
 	.map_page			= dma_direct_map_page,
 	.get_required_mask		= arch_get_required_mask,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 49e04ec19238..51d564313bd0 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -618,7 +618,6 @@ static u64 vio_dma_get_required_mask(struct device *dev)
 static const struct dma_map_ops vio_dma_mapping_ops = {
 	.alloc             = vio_dma_iommu_alloc_coherent,
 	.free              = vio_dma_iommu_free_coherent,
-	.mmap		   = dma_nommu_mmap_coherent,
 	.map_sg            = vio_dma_iommu_map_sg,
 	.unmap_sg          = vio_dma_iommu_unmap_sg,
 	.map_page          = vio_dma_iommu_map_page,
-- 
2.18.0

^ permalink raw reply related

* Re: [PATCH] of/fdt: Remove PPC32 longtrail hack in memory scan
From: Rob Herring @ 2018-07-30 17:19 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: devicetree, Frank Rowand, Paul Mackerras, linuxppc-dev
In-Reply-To: <87in4xhufu.fsf@concordia.ellerman.id.au>

On Mon, Jul 30, 2018 at 4:47 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Rob Herring <robh+dt@kernel.org> writes:
> > On Thu, Jul 26, 2018 at 11:36 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >> When the OF code was originally made common by Grant in commit
> >> 51975db0b733 ("of/flattree: merge early_init_dt_scan_memory() common
> >> code") (Feb 2010), the common code inherited a hack to handle
> >> PPC "longtrail" machines, which had a "memory@0" node with no
> >> device_type.
> >>
> >> That check was then made to only apply to PPC32 in b44aa25d20e2 ("of:
> >> Handle memory@0 node on PPC32 only") (May 2014).
> >>
> >> But according to Paul Mackerras the "longtrail" machines are long
> >> dead, if they were ever seen in the wild at all. If someone does still
> >> have one, we can handle this firmware wart in powerpc platform code.
> >>
> >> So remove the hack once and for all.
> >
> > Yay. I guess Power Macs and other quirks will never die...
>
> Not soon.
>
> In base.c I see:
>  - the hack in arch_find_n_match_cpu_physical_id()
>    - we should just move that into arch code, it's a __weak arch hook
>      after all.

Except then we'd have to export __of_find_n_match_cpu_property. I
somewhat prefer it like it is because arch specific functions tend to
encourage duplication. But if the implementation is completely
different like sparc, then yes, a separate implementation makes sense.

>  - a PPC hack in of_alias_scan(), I guess we need to retain that
>    behaviour, but it's pretty minor anyway.

It would be nice to know what platform(s) needs this as I don't have a
clue. It would also be nice if I had some dumps of DTs for some of
these OpenFirmware systems.

Rob

^ permalink raw reply

* [GIT PULL 0/5] perf/urgent fixes
From: Arnaldo Carvalho de Melo @ 2018-07-30 20:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Clark Williams, linux-kernel, linux-perf-users,
	Arnaldo Carvalho de Melo, Adrian Hunter, Alexander Shishkin,
	Breno Leitao, Daniel Borkmann, Dan Williams, David Ahern,
	Hendrik Brueckner, Jiri Olsa, Josh Poimboeuf, linuxppc-dev,
	Michael Ellerman, Mika Penttilä, Namhyung Kim,
	Peter Zijlstra, Prashant Bhole, Ravi Bangoria, Stephane Eranian,
	Thomas Gleixner, Thomas Richter, Tony Luck, Vince Weaver,
	Wang Nan, Arnaldo Carvalho de Melo

Hi Ingo,

	Please consider pulling, just to get the build without warnings
and finishing successfully in all my test environments,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit 7f635ff187ab6be0b350b3ec06791e376af238ab:

  perf/core: Fix crash when using HW tracing kernel filters (2018-07-25 11:46:22 +0200)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-urgent-for-mingo-4.18-20180730

for you to fetch changes up to 44fe619b1418ff4e9d2f9518a940fbe2fb686a08:

  perf tools: Fix the build on the alpine:edge distro (2018-07-30 13:15:03 -0300)

----------------------------------------------------------------
perf/urgent fixes: (Arnaldo Carvalho de Melo)

- Update the tools copy of several files, including perf_event.h,
  powerpc's asm/unistd.h (new io_pgetevents syscall), bpf.h and
  x86's memcpy_64.s (used in 'perf bench mem'), silencing the
  respective warnings during the perf tools build.

- Fix the build on the alpine:edge distro.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

----------------------------------------------------------------
Arnaldo Carvalho de Melo (5):
      tools headers uapi: Update tools's copy of linux/perf_event.h
      tools headers powerpc: Update asm/unistd.h copy to pick new
      tools headers uapi: Refresh linux/bpf.h copy
      tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy'
      perf tools: Fix the build on the alpine:edge distro

 tools/arch/powerpc/include/uapi/asm/unistd.h |   1 +
 tools/arch/x86/include/asm/mcsafe_test.h     |  13 ++++
 tools/arch/x86/lib/memcpy_64.S               | 112 +++++++++++++--------------
 tools/include/uapi/linux/bpf.h               |  28 +++++--
 tools/include/uapi/linux/perf_event.h        |   2 +
 tools/perf/arch/x86/util/pmu.c               |   1 +
 tools/perf/arch/x86/util/tsc.c               |   1 +
 tools/perf/bench/Build                       |   1 +
 tools/perf/bench/mem-memcpy-x86-64-asm.S     |   1 +
 tools/perf/bench/mem-memcpy-x86-64-lib.c     |  24 ++++++
 tools/perf/perf.h                            |   1 +
 tools/perf/util/header.h                     |   1 +
 tools/perf/util/namespaces.h                 |   1 +
 13 files changed, 124 insertions(+), 63 deletions(-)
 create mode 100644 tools/arch/x86/include/asm/mcsafe_test.h
 create mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c

Test results:

The first ones are container (docker) based builds of tools/perf with
and without libelf support.  Where clang is available, it is also used
to build perf with/without libelf, and building with LIBCLANGLLVM=1
(built-in clang) with gcc and clang when clang and its devel libraries
are installed.

The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container cluster.
Those will come back later.

Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of command line event specifications to then intercept the
sys_perf_event syscall to check that the perf_event_attr fields are set up as
expected, among a variety of other unit tests.

Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
with a variety of feature sets, exercising the build with an incomplete set of
features as well as with a complete one. It is planned to have it run on each
of the containers mentioned above, using some container orchestration
infrastructure. Get in contact if interested in helping having this in place.

  # dm
   1 alpine:3.4                    : Ok   gcc (Alpine 5.3.0) 5.3.0
   2 alpine:3.5                    : Ok   gcc (Alpine 6.2.1) 6.2.1 20160822
   3 alpine:3.6                    : Ok   gcc (Alpine 6.3.0) 6.3.0
   4 alpine:3.7                    : Ok   gcc (Alpine 6.4.0) 6.4.0
   5 alpine:edge                   : Ok   gcc (Alpine 6.4.0) 6.4.0
   6 amazonlinux:1                 : Ok   gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28)
   7 amazonlinux:2                 : Ok   gcc (GCC) 7.3.1 20180303 (Red Hat 7.3.1-5)
   8 android-ndk:r12b-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
   9 android-ndk:r15c-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
  10 centos:5                      : Ok   gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-55)
  11 centos:6                      : Ok   gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-18)
  12 centos:7                      : Ok   gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28)
  13 debian:7                      : Ok   gcc (Debian 4.7.2-5) 4.7.2
  14 debian:8                      : Ok   gcc (Debian 4.9.2-10+deb8u1) 4.9.2
  15 debian:9                      : Ok   gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516
  16 debian:experimental           : Ok   gcc (Debian 8.2.0-1) 8.2.0
  17 debian:experimental-x-arm64   : Ok   aarch64-linux-gnu-gcc (Debian 8.1.0-12) 8.1.0
  18 debian:experimental-x-mips    : Ok   mips-linux-gnu-gcc (Debian 8.1.0-12) 8.1.0
  19 debian:experimental-x-mips64  : Ok   mips64-linux-gnuabi64-gcc (Debian 7.3.0-18) 7.3.0
  20 debian:experimental-x-mipsel  : Ok   mipsel-linux-gnu-gcc (Debian 8.1.0-12) 8.1.0
  21 fedora:20                     : Ok   gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7)
  22 fedora:21                     : Ok   gcc (GCC) 4.9.2 20150212 (Red Hat 4.9.2-6)
  23 fedora:22                     : Ok   gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6)
  24 fedora:23                     : Ok   gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6)
  25 fedora:24                     : Ok   gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)
  26 fedora:24-x-ARC-uClibc        : Ok   arc-linux-gcc (ARCompact ISA Linux uClibc toolchain 2017.09-rc2) 7.1.1 20170710
  27 fedora:25                     : Ok   gcc (GCC) 6.4.1 20170727 (Red Hat 6.4.1-1)
  28 fedora:26                     : Ok   gcc (GCC) 7.3.1 20180130 (Red Hat 7.3.1-2)
  29 fedora:27                     : Ok   gcc (GCC) 7.3.1 20180303 (Red Hat 7.3.1-5)
  30 fedora:28                     : Ok   gcc (GCC) 8.1.1 20180502 (Red Hat 8.1.1-1)
  31 fedora:rawhide                : Ok   gcc (GCC) 8.0.1 20180324 (Red Hat 8.0.1-0.20)
  32 gentoo-stage3-amd64:latest    : Ok   gcc (Gentoo 7.3.0-r3 p1.4) 7.3.0
  33 mageia:5                      : Ok   gcc (GCC) 4.9.2
  34 mageia:6                      : Ok   gcc (Mageia 5.5.0-1.mga6) 5.5.0
  35 opensuse:42.1                 : Ok   gcc (SUSE Linux) 4.8.5
  36 opensuse:42.2                 : Ok   gcc (SUSE Linux) 4.8.5
  37 opensuse:42.3                 : Ok   gcc (SUSE Linux) 4.8.5
  38 opensuse:tumbleweed           : Ok   gcc (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]
  39 oraclelinux:6                 : Ok   gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1)
  40 oraclelinux:7                 : Ok   gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28.0.1)
  41 ubuntu:12.04.5                : Ok   gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
  42 ubuntu:14.04.4                : Ok   gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
  43 ubuntu:14.04.4-x-linaro-arm64 : Ok   aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0
  44 ubuntu:16.04                  : Ok   gcc (Ubuntu 5.4.0-6ubuntu1~16.04.10) 5.4.0 20160609
  45 ubuntu:16.04-x-arm            : Ok   arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
  46 ubuntu:16.04-x-arm64          : Ok   aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
  47 ubuntu:16.04-x-powerpc        : Ok   powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
  48 ubuntu:16.04-x-powerpc64      : Ok   powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20
  49 ubuntu:16.04-x-powerpc64el    : Ok   powerpc64le-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
  50 ubuntu:16.04-x-s390           : Ok   s390x-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
  51 ubuntu:16.10                  : Ok   gcc (Ubuntu 6.2.0-5ubuntu12) 6.2.0 20161005
  52 ubuntu:17.04                  : Ok   gcc (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406
  53 ubuntu:17.10                  : Ok   gcc (Ubuntu 7.2.0-8ubuntu3.2) 7.2.0
  54 ubuntu:18.04                  : Ok   gcc (Ubuntu 7.3.0-16ubuntu3) 7.3.0
  #

  # uname -a
  Linux seventh 4.18.0-rc7 #3 SMP Mon Jul 30 11:37:40 -03 2018 x86_64 x86_64 x86_64 GNU/Linux
  # git log --oneline -1
  44fe619b1418 (HEAD -> perf/urgent) perf tools: Fix the build on the alpine:edge distro
  # perf version --build-options
  perf version 4.18.rc6.g44fe61
                   dwarf: [ on  ]  # HAVE_DWARF_SUPPORT
      dwarf_getlocations: [ on  ]  # HAVE_DWARF_GETLOCATIONS_SUPPORT
                   glibc: [ on  ]  # HAVE_GLIBC_SUPPORT
                    gtk2: [ on  ]  # HAVE_GTK2_SUPPORT
           syscall_table: [ on  ]  # HAVE_SYSCALL_TABLE_SUPPORT
                  libbfd: [ on  ]  # HAVE_LIBBFD_SUPPORT
                  libelf: [ on  ]  # HAVE_LIBELF_SUPPORT
                 libnuma: [ on  ]  # HAVE_LIBNUMA_SUPPORT
  numa_num_possible_cpus: [ on  ]  # HAVE_LIBNUMA_SUPPORT
                 libperl: [ on  ]  # HAVE_LIBPERL_SUPPORT
               libpython: [ on  ]  # HAVE_LIBPYTHON_SUPPORT
                libslang: [ on  ]  # HAVE_SLANG_SUPPORT
               libcrypto: [ on  ]  # HAVE_LIBCRYPTO_SUPPORT
               libunwind: [ on  ]  # HAVE_LIBUNWIND_SUPPORT
      libdw-dwarf-unwind: [ on  ]  # HAVE_DWARF_SUPPORT
                    zlib: [ on  ]  # HAVE_ZLIB_SUPPORT
                    lzma: [ on  ]  # HAVE_LZMA_SUPPORT
               get_cpuid: [ on  ]  # HAVE_AUXTRACE_SUPPORT
                     bpf: [ on  ]  # HAVE_LIBBPF_SUPPORT
  # perf test
   1: vmlinux symtab matches kallsyms                       : Ok
   2: Detect openat syscall event                           : Ok
   3: Detect openat syscall event on all cpus               : Ok
   4: Read samples using the mmap interface                 : Ok
   5: Test data source output                               : Ok
   6: Parse event definition strings                        : Ok
   7: Simple expression parser                              : Ok
   8: PERF_RECORD_* events & perf_sample fields             : Ok
   9: Parse perf pmu format                                 : Ok
  10: DSO data read                                         : Ok
  11: DSO data cache                                        : Ok
  12: DSO data reopen                                       : Ok
  13: Roundtrip evsel->name                                 : Ok
  14: Parse sched tracepoints fields                        : Ok
  15: syscalls:sys_enter_openat event fields                : Ok
  16: Setup struct perf_event_attr                          : Ok
  17: Match and link multiple hists                         : Ok
  18: 'import perf' in python                               : Ok
  19: Breakpoint overflow signal handler                    : Ok
  20: Breakpoint overflow sampling                          : Ok
  21: Breakpoint accounting                                 : Ok
  22: Number of exit events of a simple workload            : Ok
  23: Software clock events period values                   : Ok
  24: Object code reading                                   : Ok
  25: Sample parsing                                        : Ok
  26: Use a dummy software event to keep tracking           : Ok
  27: Parse with no sample_id_all bit set                   : Ok
  28: Filter hist entries                                   : Ok
  29: Lookup mmap thread                                    : Ok
  30: Share thread mg                                       : Ok
  31: Sort output of hist entries                           : Ok
  32: Cumulate child hist entries                           : Ok
  33: Track with sched_switch                               : Ok
  34: Filter fds with revents mask in a fdarray             : Ok
  35: Add fd to a fdarray, making it autogrow               : Ok
  36: kmod_path__parse                                      : Ok
  37: Thread map                                            : Ok
  38: LLVM search and compile                               :
  38.1: Basic BPF llvm compile                              : Ok
  38.2: kbuild searching                                    : Ok
  38.3: Compile source for BPF prologue generation          : Ok
  38.4: Compile source for BPF relocation                   : Ok
  39: Session topology                                      : Ok
  40: BPF filter                                            :
  40.1: Basic BPF filtering                                 : Ok
  40.2: BPF pinning                                         : Ok
  40.3: BPF prologue generation                             : Ok
  40.4: BPF relocation checker                              : Ok
  41: Synthesize thread map                                 : Ok
  42: Remove thread map                                     : Ok
  43: Synthesize cpu map                                    : Ok
  44: Synthesize stat config                                : Ok
  45: Synthesize stat                                       : Ok
  46: Synthesize stat round                                 : Ok
  47: Synthesize attr update                                : Ok
  48: Event times                                           : Ok
  49: Read backward ring buffer                             : Ok
  50: Print cpu map                                         : Ok
  51: Probe SDT events                                      : Ok
  52: is_printable_array                                    : Ok
  53: Print bitmap                                          : Ok
  54: perf hooks                                            : Ok
  55: builtin clang support                                 : Skip (not compiled in)
  56: unit_number__scnprintf                                : Ok
  57: mem2node                                              : Ok
  58: x86 rdpmc                                             : Ok
  59: Convert perf time to TSC                              : Ok
  60: DWARF unwind                                          : Ok
  61: x86 instruction decoder - new instructions            : Ok
  62: probe libc's inet_pton & backtrace it with ping       : Ok
  63: Check open filename arg using perf trace + vfs_getname: Ok
  64: Use vfs_getname probe to get syscall args filenames   : Ok
  65: Add vfs_getname probe to get syscall args filenames   : Ok
  # 

  $ make -C tools/perf build-test
  make: Entering directory '/home/acme/git/perf/tools/perf'
  - tarpkg: ./tests/perf-targz-src-pkg .
             make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
                 make_static_O: make LDFLAGS=-static
   make_install_prefix_slash_O: make install prefix=/tmp/krava/
        make_with_babeltrace_O: make LIBBABELTRACE=1
             make_no_libnuma_O: make NO_LIBNUMA=1
           make_no_libpython_O: make NO_LIBPYTHON=1
                make_install_O: make install
           make_no_libbionic_O: make NO_LIBBIONIC=1
            make_no_auxtrace_O: make NO_AUXTRACE=1
              make_no_libelf_O: make NO_LIBELF=1
                make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
           make_no_libunwind_O: make NO_LIBUNWIND=1
         make_with_clangllvm_O: make LIBCLANGLLVM=1
                  make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
                   make_help_O: make help
                    make_doc_O: make doc
              make_no_libbpf_O: make NO_LIBBPF=1
           make_no_backtrace_O: make NO_BACKTRACE=1
                 make_cscope_O: make cscope
            make_no_libaudit_O: make NO_LIBAUDIT=1
            make_install_bin_O: make install-bin
                make_no_newt_O: make NO_NEWT=1
            make_no_demangle_O: make NO_DEMANGLE=1
                   make_tags_O: make tags
                 make_perf_o_O: make perf.o
       make_util_pmu_bison_o_O: make util/pmu-bison.o
                  make_debug_O: make DEBUG=1
                   make_pure_O: make
               make_no_slang_O: make NO_SLANG=1
                make_no_gtk2_O: make NO_GTK2=1
              make_clean_all_O: make clean all
             make_no_libperl_O: make NO_LIBPERL=1
             make_util_map_o_O: make util/map.o
         make_install_prefix_O: make install prefix=/tmp/krava
  make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
  OK
  make: Leaving directory '/home/acme/git/perf/tools/perf'
  $

^ permalink raw reply

* [PATCH 2/5] tools headers powerpc: Update asm/unistd.h copy to pick new
From: Arnaldo Carvalho de Melo @ 2018-07-30 20:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Clark Williams, linux-kernel, linux-perf-users,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Breno Leitao,
	Hendrik Brueckner, Jiri Olsa, linuxppc-dev, Michael Ellerman,
	Namhyung Kim, Ravi Bangoria, Thomas Richter
In-Reply-To: <20180730205031.12853-1-acme@kernel.org>

From: Arnaldo Carvalho de Melo <acme@redhat.com>

The new 'io_pgetevents' syscall was wired up in PowerPC in the following
cset:

  b2f82565f2ca ("powerpc: Wire up io_pgetevents")

Update tools/arch/powerpc/ copy of the asm/unistd.h file so that 'perf
trace' on PowerPC gets it in its syscall table.

This elliminated the following perf build warning:

  Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/unistd.h' differs from latest version at 'arch/powerpc/include/uapi/asm/unistd.h'

Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: https://lkml.kernel.org/n/tip-9uvu7tz4ud3bxxfyxwryuz47@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/uapi/asm/unistd.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
index ac5ba55066dd..985534d0b448 100644
--- a/tools/arch/powerpc/include/uapi/asm/unistd.h
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -399,5 +399,6 @@
 #define __NR_pkey_free		385
 #define __NR_pkey_mprotect	386
 #define __NR_rseq		387
+#define __NR_io_pgetevents	388
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
-- 
2.14.4

^ permalink raw reply related

* Re: [PATCH] powerpc/mobility: Fix node detach/rename problem
From: Michael Bringmann @ 2018-07-30 21:02 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <8736w1jkte.fsf@concordia.ellerman.id.au>

See below.

On 07/30/2018 01:31 AM, Michael Ellerman wrote:
> Michael Bringmann <mwb@linux.vnet.ibm.com> writes:
> 
>> During LPAR migration, the content of the device tree/sysfs may
>> be updated including deletion and replacement of nodes in the
>> tree.  When nodes are added to the internal node structures, they
>> are appended in FIFO order to a list of nodes maintained by the
>> OF code APIs.
> 
> That hasn't been true for several years. The data structure is an n-ary
> tree. What kernel version are you working on?

Sorry for an error in my description.  I oversimplified based on the
name of a search iterator.  Let me try to provide a better explanation
of the problem, here.

This is the problem.  The PPC mobility code receives RTAS requests to
delete nodes with platform-/hardware-specific attributes when restarting
the kernel after a migration.  My example is for migration between a
P8 Alpine and a P8 Brazos.   Nodes to be deleted may include 'ibm,random-v1',
'ibm,compression-v1', 'ibm,platform-facilities', 'ibm,sym-encryption-v1',
or others.

The mobility.c code calls 'of_detach_node' for the nodes and their children.
This makes calls to detach the properties and to try to remove the associated
sysfs/kernfs files.

Then new copies of the same nodes are next provided by the PHYP, local
copies are built, and a pointer to the 'struct device_node' is passed to
of_attach_node.  Before the call to of_attach_node, the phandle is initialized
to 0 when the data structure is alloced.  During the call to of_attach_node,
it calls __of_attach_node which pulls the actual name and phandle from just
created sub-properties named something like 'name' and 'ibm,phandle'.

This is all fine for the first migration.  The problem occurs with the
second and subsequent migrations when the PHYP on the new system wants to
replace the same set of nodes again, referenced with the same names and
phandle values.

> 
>> When nodes are removed from the device tree, they
>> are marked OF_DETACHED, but not actually deleted from the system
>> to allow for pointers cached elsewhere in the kernel.  The order
>> and content of the entries in the list of nodes is not altered,
>> though.
> 
> Something is going wrong if this is actually happening.
> 
> When the node is detached it should be *detached* from the tree of all
> nodes, so it should not be discoverable other than by having an existing
> pointer to it.
On the second and subsequent migrations, the PHYP tells the system
to again delete the nodes 'ibm,platform-facilities', 'ibm,random-v1',
'ibm,compression-v1', 'ibm,sym-encryption-v1'.  It specifies these
nodes by its known set of phandle values -- the same handles used
by the PHYP on the source system are known on the target system.
The mobility.c code calls of_find_node_by_phandle() with these values
and ends up locating the first instance of each node that was added
during the original boot, instead of the second instance of each node
created after the first migration.  The detach during the second
migration fails with errors like,

[ 4565.030704] WARNING: CPU: 3 PID: 4787 at drivers/of/dynamic.c:252 __of_detach_node+0x8/0xa0
[ 4565.030708] Modules linked in: nfsv3 nfs_acl nfs tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag lockd grace fscache sunrpc xts vmx_crypto sg pseries_rng binfmt_misc ip_tables xfs libcrc32c sd_mod ibmveth ibmvscsi scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod
[ 4565.030733] CPU: 3 PID: 4787 Comm: drmgr Tainted: G        W         4.18.0-rc1-wi107836-v05-120+ #201
[ 4565.030737] NIP:  c0000000007c1ea8 LR: c0000000007c1fb4 CTR: 0000000000655170
[ 4565.030741] REGS: c0000003f302b690 TRAP: 0700   Tainted: G        W          (4.18.0-rc1-wi107836-v05-120+)
[ 4565.030745] MSR:  800000010282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]>  CR: 22288822  XER: 0000000a
[ 4565.030757] CFAR: c0000000007c1fb0 IRQMASK: 1
[ 4565.030757] GPR00: c0000000007c1fa4 c0000003f302b910 c00000000114bf00 c0000003ffff8e68
[ 4565.030757] GPR04: 0000000000000001 ffffffffffffffff 800000c008e0b4b8 ffffffffffffffff
[ 4565.030757] GPR08: 0000000000000000 0000000000000001 0000000080000003 0000000000002843
[ 4565.030757] GPR12: 0000000000008800 c00000001ec9ae00 0000000040000000 0000000000000000
[ 4565.030757] GPR16: 0000000000000000 0000000000000008 0000000000000000 00000000f6ffffff
[ 4565.030757] GPR20: 0000000000000007 0000000000000000 c0000003e9f1f034 0000000000000001
[ 4565.030757] GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 4565.030757] GPR28: c000000001549d28 c000000001134828 c0000003ffff8e68 c0000003f302b930
[ 4565.030804] NIP [c0000000007c1ea8] __of_detach_node+0x8/0xa0
[ 4565.030808] LR [c0000000007c1fb4] of_detach_node+0x74/0xd0
[ 4565.030811] Call Trace:
[ 4565.030815] [c0000003f302b910] [c0000000007c1fa4] of_detach_node+0x64/0xd0 (unreliable)
[ 4565.030821] [c0000003f302b980] [c0000000000c33c4] dlpar_detach_node+0xb4/0x150
[ 4565.030826] [c0000003f302ba10] [c0000000000c3ffc] delete_dt_node+0x3c/0x80
[ 4565.030831] [c0000003f302ba40] [c0000000000c4380] pseries_devicetree_update+0x150/0x4f0
[ 4565.030836] [c0000003f302bb70] [c0000000000c479c] post_mobility_fixup+0x7c/0xf0
[ 4565.030841] [c0000003f302bbe0] [c0000000000c4908] migration_store+0xf8/0x130
[ 4565.030847] [c0000003f302bc70] [c000000000998160] kobj_attr_store+0x30/0x60
[ 4565.030852] [c0000003f302bc90] [c000000000412f14] sysfs_kf_write+0x64/0xa0
[ 4565.030857] [c0000003f302bcb0] [c000000000411cac] kernfs_fop_write+0x16c/0x240
[ 4565.030862] [c0000003f302bd00] [c000000000355f20] __vfs_write+0x40/0x220
[ 4565.030867] [c0000003f302bd90] [c000000000356358] vfs_write+0xc8/0x240
[ 4565.030872] [c0000003f302bde0] [c0000000003566cc] ksys_write+0x5c/0x100
[ 4565.030880] [c0000003f302be30] [c00000000000b288] system_call+0x5c/0x70
[ 4565.030884] Instruction dump:
[ 4565.030887] 38210070 38600000 e8010010 eb61ffd8 eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8
[ 4565.030895] 7c0803a6 4e800020 e9230098 7929f7e2 <0b090000> 2f890000 4cde0020 e9030040
[ 4565.030903] ---[ end trace 5bd54cb1df9d2976 ]---

The mobility.c code continues on during the second migration, accepts the
definitions of the new nodes from the PHYP and ends up renaming the new
properties e.g.

[ 4565.827296] Duplicate name in base, renamed to "ibm,platform-facilities#1"

I don't see any check like 'of_node_check_flag(np, OF_DETACHED)' within
of_find_node_by_phandle to skip nodes that are detached, but still present
due to caching or use count considerations.  Another possibility to consider
is that of_find_node_by_phandle also uses something called 'phandle_cache'
which may have outdated data as of_detach_node() does not have access to
that cache for the 'OF_DETACHED' nodes.

>
> That's what __of_detach_node() does:
> 
> 	parent = np->parent;
> 	if (WARN_ON(!parent))
> 		return;
> 
> 	if (parent->child == np)
> 		parent->child = np->sibling;
> 	else {
> 		struct device_node *prevsib;
> 		for (prevsib = np->parent->child;
> 		     prevsib->sibling != np;
> 		     prevsib = prevsib->sibling)
> 			;
> 		prevsib->sibling = np->sibling;
> 	}
> 
> ie. the node must already have a NULL parent, and then it is spliced out
> of its parent's child list.
> 
> 
> Please give us more info so we can work out what's actually happening.

I duplicated the problem in the 4.18 raw kernel on the second and subsequent
legs of migration of an LPAR back and forth between an Alpine to a Brazos.
Again, entries like  'ibm,random-v1', 'ibm,compression-v1',
'ibm,platform-facilities', 'ibm,sym-encryption-v1', yield errors as described
above.

My patch was attempting to resolve the issue wholly within the powerpc code,
at this time.

> 
> cheers
> 

Michael

-- 
Michael W. Bringmann
Linux Technology Center
IBM Corporation
Tie-Line  363-5196
External: (512) 286-5196
Cell:       (512) 466-0650
mwb@linux.vnet.ibm.com

^ permalink raw reply

* Re: [PATCH] PCI: call dma_debug_add_bus for pci_bus_type in common code
From: Bjorn Helgaas @ 2018-07-30 21:17 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-pci, iommu, linuxppc-dev, x86, linux-sh, linux-kernel,
	Joerg Roedel
In-Reply-To: <20180730073842.16092-1-hch@lst.de>

[+cc Joerg]

On Mon, Jul 30, 2018 at 09:38:42AM +0200, Christoph Hellwig wrote:
> There is nothing arch specific about PCI or dma-debug, so move this
> call to common code just after registering the bus type.

I assume that previously, even if the user set CONFIG_DMA_API_DEBUG=y
we only got PCI DMA debug on powerpc, sh, and x86.  And after this
patch, we'll get PCI DMA debug on *all* arches?

If that's true, I'll add a comment to that effect to the commitlog
since that new functionality might be of interest to other arches.

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/powerpc/kernel/dma.c | 3 ---
>  arch/sh/drivers/pci/pci.c | 2 --
>  arch/x86/kernel/pci-dma.c | 3 ---
>  drivers/pci/pci-driver.c  | 2 +-
>  4 files changed, 1 insertion(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index 155170d70324..dbfc7056d7df 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -357,9 +357,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
>  
>  static int __init dma_init(void)
>  {
> -#ifdef CONFIG_PCI
> -	dma_debug_add_bus(&pci_bus_type);
> -#endif
>  #ifdef CONFIG_IBMVIO
>  	dma_debug_add_bus(&vio_bus_type);
>  #endif
> diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
> index e5b7437ab4af..8256626bc53c 100644
> --- a/arch/sh/drivers/pci/pci.c
> +++ b/arch/sh/drivers/pci/pci.c
> @@ -160,8 +160,6 @@ static int __init pcibios_init(void)
>  	for (hose = hose_head; hose; hose = hose->next)
>  		pcibios_scanbus(hose);
>  
> -	dma_debug_add_bus(&pci_bus_type);
> -
>  	pci_initialized = 1;
>  
>  	return 0;
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index ab5d9dd668d2..43f58632f123 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -155,9 +155,6 @@ static int __init pci_iommu_init(void)
>  {
>  	struct iommu_table_entry *p;
>  
> -#ifdef CONFIG_PCI
> -	dma_debug_add_bus(&pci_bus_type);
> -#endif
>  	x86_init.iommu.iommu_init();
>  
>  	for (p = __iommu_table; p < __iommu_table_end; p++) {
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 6792292b5fc7..bef17c3fca67 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1668,7 +1668,7 @@ static int __init pci_driver_init(void)
>  	if (ret)
>  		return ret;
>  #endif
> -
> +	dma_debug_add_bus(&pci_bus_type);
>  	return 0;
>  }
>  postcore_initcall(pci_driver_init);
> -- 
> 2.18.0
> 

^ permalink raw reply

* Re: [PATCH v3 1/1] powerpc/pseries: fix EEH recovery of some IOV devices
From: Bjorn Helgaas @ 2018-07-30 21:21 UTC (permalink / raw)
  To: Sam Bobroff; +Cc: linuxppc-dev, linux-pci, mpe, bhelgaas, bryantly
In-Reply-To: <e63eb03c87a1a54257aa2bcc384cf07761824a7e.1532915951.git.sbobroff@linux.ibm.com>

On Mon, Jul 30, 2018 at 11:59:14AM +1000, Sam Bobroff wrote:
> EEH recovery currently fails on pSeries for some IOV capable PCI
> devices, if CONFIG_PCI_IOV is on and the hypervisor doesn't provide
> certain device tree properties for the device. (Found on an IOV
> capable device using the ipr driver.)
> 
> Recovery fails in pci_enable_resources() at the check on r->parent,
> because r->flags is set and r->parent is not.  This state is due to
> sriov_init() setting the start, end and flags members of the IOV BARs
> but the parent not being set later in
> pseries_pci_fixup_iov_resources(), because the
> "ibm,open-sriov-vf-bar-info" property is missing.
> 
> Correct this by zeroing the resource flags for IOV BARs when they
> can't be configured (this is the same method used by sriov_init() and
> __pci_read_base()).
> 
> VFs cleared this way can't be enabled later, because that requires
> another device tree property, "ibm,number-of-configurable-vfs" as well
> as support for the RTAS function "ibm_map_pes". These are all part of
> hypervisor support for IOV and it seems unlikely that a hypervisor
> would ever partially, but not fully, support it. (None are currently
> provided by QEMU/KVM.)
> 
> Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>

Michael, I assume you'll take this, since it only touches powerpc.
Let me know if you need anything from me.

> ---
> Hi,
> 
> This is a fix to allow EEH recovery to succeed in a specific situation,
> which I've tried to explain in the commit message.
> 
> As with the RFC version, the IOV BARs are disabled by setting the resource
> flags to 0 but the other fields are now left as-is because that is what is done
> elsewhere (see sriov_init() and __pci_read_base()).
> 
> I've also examined the concern raised by Bjorn Helgaas, that VFs could be
> enabled later after the BARs are disabled, and it already seems safe: enabling
> VFs (on pseries) depends on another device tree property,
> "ibm,number-of-configurable-vfs" as well as support for the RTAS function
> "ibm_map_pes". Since these are all part of the hypervisor's support for IOV it
> seems unlikely that we would ever see some of them but not all. (None are
> currently provided by QEMU/KVM.) (Additionally, the ipr driver on which the EEH
> recovery failure was discovered doesn't even seem to have SR-IOV support so it
> certainly can't enable VFs.)
> 
> Cheers,
> Sam.
> 
> Patch set v3:
> Patch 1/1: powerpc/pseries: fix EEH recovery of some IOV devices
> * Moved some useful information from the cover letter to the commit log.
> 
> Patch set v2:
> Patch 1/1: powerpc/pseries: fix EEH recovery of some IOV devices
> * Moved the BAR disabling code to a function.
> * Also check in pseries_pci_fixup_resources().
> 
> Patch set v1:
> Patch 1/1: powerpc/pseries: fix EEH recovery of IOV devices
> 
>  arch/powerpc/platforms/pseries/setup.c | 25 +++++++++++++++++--------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> index b55ad4286dc7..0a9e4243ae1d 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -645,6 +645,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
>  	}
>  }
>  
> +static void pseries_disable_sriov_resources(struct pci_dev *pdev)
> +{
> +	int i;
> +
> +	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> +		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
> +}
> +
>  static void pseries_pci_fixup_resources(struct pci_dev *pdev)
>  {
>  	const int *indexes;
> @@ -652,10 +661,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev)
>  
>  	/*Firmware must support open sriov otherwise dont configure*/
>  	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
> -	if (!indexes)
> -		return;
> -	/* Assign the addresses from device tree*/
> -	of_pci_set_vf_bar_size(pdev, indexes);
> +	if (indexes)
> +		of_pci_set_vf_bar_size(pdev, indexes);
> +	else
> +		pseries_disable_sriov_resources(pdev);
>  }
>  
>  static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
> @@ -667,10 +676,10 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
>  		return;
>  	/*Firmware must support open sriov otherwise dont configure*/
>  	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
> -	if (!indexes)
> -		return;
> -	/* Assign the addresses from device tree*/
> -	of_pci_parse_iov_addrs(pdev, indexes);
> +	if (indexes)
> +		of_pci_parse_iov_addrs(pdev, indexes);
> +	else
> +		pseries_disable_sriov_resources(pdev);
>  }
>  
>  static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
> -- 
> 2.16.1.74.g9b0b1f47b
> 

^ permalink raw reply

* [PATCH 3/6] powerpc: factor out RapidIO Kconfig menu entry
From: Alexei Colin @ 2018-07-30 22:50 UTC (permalink / raw)
  To: Alexandre Bounine, Benjamin Herrenschmidt, Paul Mackerras
  Cc: Alexei Colin, Andrew Morton, John Paul Walters, linuxppc-dev,
	linux-kernel
In-Reply-To: <20180730225035.28365-1-acolin@isi.edu>

The menu entry is now defined in the rapidio subtree.  Also, re-order
the bus menu so tha the platform-specific RapidIO controller appears
after the entry for the RapidIO subsystem.

Platforms with a PCI bus will be offered the RapidIO menu since they may
be want support for a RapidIO PCI device. Platforms without a PCI bus
that might include a RapidIO IP block will need to "select HAS_RAPIDIO"
in the platform-/machine-specific "config ARCH_*" Kconfig entry.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: John Paul Walters <jwalters@isi.edu>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alexei Colin <acolin@isi.edu>
---
 arch/powerpc/Kconfig | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 25d005af0a5b..17ea8a5f90a0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -993,16 +993,7 @@ source "drivers/pci/Kconfig"
 
 source "drivers/pcmcia/Kconfig"
 
-config HAS_RAPIDIO
-	bool
-	default n
-
-config RAPIDIO
-	tristate "RapidIO support"
-	depends on HAS_RAPIDIO || PCI
-	help
-	  If you say Y here, the kernel will include drivers and
-	  infrastructure code to support RapidIO interconnect devices.
+source "drivers/rapidio/Kconfig"
 
 config FSL_RIO
 	bool "Freescale Embedded SRIO Controller support"
@@ -1012,8 +1003,6 @@ config FSL_RIO
 	  Include support for RapidIO controller on Freescale embedded
 	  processors (MPC8548, MPC8641, etc).
 
-source "drivers/rapidio/Kconfig"
-
 endmenu
 
 config NONSTATIC_KERNEL
-- 
2.18.0

^ permalink raw reply related

* [PATCH 0/6] rapidio: move Kconfig menu definition to subsystem
From: Alexei Colin @ 2018-07-30 22:50 UTC (permalink / raw)
  To: Alexandre Bounine, Andrew Morton
  Cc: Alexei Colin, John Paul Walters, Catalin Marinas, Russell King,
	Arnd Bergmann, Will Deacon, Ralf Baechle, Paul Burton,
	Alexander Sverdlin, Benjamin Herrenschmidt, Paul Mackerras,
	Thomas Gleixner, Peter Anvin, Matt Porter, x86, linuxppc-dev,
	linux-mips, linux-arm-kernel, linux-kernel

The top-level Kconfig entry for RapidIO subsystem is currently
duplicated in several architecture-specific Kconfig files. This set of
patches does two things:

1. Move the Kconfig menu definition into the RapidIO subsystem and
remove the duplicate definitions from arch Kconfig files.

2. Enable RapidIO Kconfig menu entry for arm and arm64 architectures,
where it was not enabled before. I tested that subsystem and drivers
build successfully for both architectures, and tested that the modules
load on a custom arm64 Qemu model.

For all architectures, RapidIO menu should be offered when either:
(1) The platform has a PCI bus (which host a RapidIO module on the bus).
(2) The platform has a RapidIO IP block (connected to a system bus, e.g.
AXI on ARM). In this case, 'select HAS_RAPIDIO' should be added to the
'config ARCH_*' menu entry for the SoCs that offer the IP block.

Prior to this patchset, different architectures used different criteria:
* powerpc: (1) and (2)
* mips: (1) and (2) after recent commit into next that added (2):
  https://www.linux-mips.org/archives/linux-mips/2018-07/msg00596.html
  fc5d988878942e9b42a4de5204bdd452f3f1ce47
  491ec1553e0075f345fbe476a93775eabcbc40b6
* x86: (1)
* arm,arm64: none (RapidIO menus never offered)

Responses to feedback from prior submission (thanks for the reviews!):
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-July/593347.html
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-July/593349.html

Changelog:
  * Moved Kconfig entry into RapidIO subsystem instead of duplicating

In the current patchset, I took the approach of adding '|| PCI' to the
depends in the subsystem. I did try the alterantive approach mentioned
in the reviews for v1 of this patch, where the subsystem Kconfig does
not add a '|| PCI' and each per-architecture Kconfig has to add a
'select HAS_RAPIDIO if PCI' and SoCs with IP blocks have to also add
'select HAS_RAPIDIO'. This works too but requires each architecture's
Kconfig to add the line for RapidIO (whereas current approach does not
require that involvement) and also may create a false impression that
the dependency on PCI is strict.

We appreciate the suggestion for also selecting the RapdiIO subsystem for
compilation with COMPILE_TEST, but hope to address it in a separate
patchset, localized to the subsystem, since it will need to change
depends on all drivers, not just on the top level, and since this
patch now spans multiple architectures.


Alexei Colin (6):
  rapidio: define top Kconfig menu in driver subtree
  x86: factor out RapidIO Kconfig menu
  powerpc: factor out RapidIO Kconfig menu entry
  mips: factor out RapidIO Kconfig entry
  arm: enable RapidIO menu in Kconfig
  arm64: enable RapidIO menu in Kconfig

 arch/arm/Kconfig        |  2 ++
 arch/arm64/Kconfig      |  2 ++
 arch/mips/Kconfig       | 11 -----------
 arch/powerpc/Kconfig    | 13 +------------
 arch/x86/Kconfig        |  8 --------
 drivers/rapidio/Kconfig | 15 +++++++++++++++
 6 files changed, 20 insertions(+), 31 deletions(-)

-- 
2.18.0

^ permalink raw reply

* Re: [PATCH 0/6] rapidio: move Kconfig menu definition to subsystem
From: Russell King - ARM Linux @ 2018-07-30 22:59 UTC (permalink / raw)
  To: Alexei Colin
  Cc: Alexandre Bounine, Andrew Morton, John Paul Walters,
	Catalin Marinas, Arnd Bergmann, Will Deacon, Ralf Baechle,
	Paul Burton, Alexander Sverdlin, Benjamin Herrenschmidt,
	Paul Mackerras, Thomas Gleixner, Peter Anvin, Matt Porter, x86,
	linuxppc-dev, linux-mips, linux-arm-kernel, linux-kernel
In-Reply-To: <20180730225035.28365-1-acolin@isi.edu>

I only have this message, and patches 5 and 6.  This is meaningless
for me to review - I can't tell what you've done as far as my comments
on your previous iteration.

Please arrange to _at least_ copy all patches the appropriate mailing
lists for the set with your complete patch set if you aren't going to
copy everyone on all patches in the set.

On Mon, Jul 30, 2018 at 06:50:28PM -0400, Alexei Colin wrote:
> In the current patchset, I took the approach of adding '|| PCI' to the
> depends in the subsystem. I did try the alterantive approach mentioned
> in the reviews for v1 of this patch, where the subsystem Kconfig does
> not add a '|| PCI' and each per-architecture Kconfig has to add a
> 'select HAS_RAPIDIO if PCI' and SoCs with IP blocks have to also add
> 'select HAS_RAPIDIO'. This works too but requires each architecture's
> Kconfig to add the line for RapidIO (whereas current approach does not
> require that involvement) and also may create a false impression that
> the dependency on PCI is strict.

... which means, as it stands, I've no idea what you actually came up
with for this.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 13.8Mbps down 630kbps up
According to speedtest.net: 13Mbps down 490kbps up

^ permalink raw reply

* Re: [PATCH v2 04/10] powerpc/traps: Use REG_FMT in show_signal_msg()
From: Murilo Opsfelder Araujo @ 2018-07-30 23:17 UTC (permalink / raw)
  To: LEROY Christophe
  Cc: linux-kernel, Alastair D'Silva, Andrew Donnellan,
	Balbir Singh, Benjamin Herrenschmidt, Cyril Bur,
	Eric W . Biederman, Joe Perches, Michael Ellerman,
	Michael Neuling, Nicholas Piggin, Paul Mackerras, Simon Guo,
	Sukadev Bhattiprolu, Tobin C . Harding, linuxppc-dev
In-Reply-To: <20180730183047.Horde.g9hQ_3EXmF6St0upNs2DDw1@messagerie.si.c-s.fr>

Hi, Christophe.

On Mon, Jul 30, 2018 at 06:30:47PM +0200, LEROY Christophe wrote:
> Murilo Opsfelder Araujo <muriloo@linux.ibm.com> a écrit :
>
> > Hi, Christophe.
> >
> > On Fri, Jul 27, 2018 at 06:40:23PM +0200, LEROY Christophe wrote:
> > > Murilo Opsfelder Araujo <muriloo@linux.ibm.com> a écrit :
> > >
> > > > Simplify the message format by using REG_FMT as the register format.  This
> > > > avoids having two different formats and avoids checking for MSR_64BIT.
> > >
> > > Are you sure it is what we want ?
> >
> > Yes.
> >
> > > Won't it change the behaviour for a 32 bits app running on a 64bits kernel ?
> >
> > In fact, this changes how many zeroes are prefixed when displaying the
> > registers
> > (%016lx vs. %08lx format).  For example, 32-bits userspace, 64-bits kernel:
>
> Indeed that's what I suspected. What is the real benefit of this change ?
> Why not keep the current format for 32bits userspace ? All those leading
> zeroes are pointless to me.

One of the benefits is simplifying the code by removing some checks.  Another is
deduplicating almost identical format strings in favor of a unified one.

After reading Joe's comment [1], %px seems to be the format we're looking for.
An extract from Documentation/core-api/printk-formats.rst:

  "%px is functionally equivalent to %lx (or %lu). %px is preferred because it
  is more uniquely grep'able."

So I guess we don't need to worry about the format (%016lx vs. %08lx), let's
just use %px, as per the guideline.

[1] https://lore.kernel.org/lkml/26f07092cdde378ebb42c1034badde1b56521c36.camel@perches.com/

Cheers
Murilo

^ permalink raw reply

* Re: [PATCH 2/3] selftests/powerpc: Only run some tests on ppc64le
From: Gustavo Romero @ 2018-07-31  0:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
In-Reply-To: <87pnz9z0n8.fsf@concordia.ellerman.id.au>

Hi Michael,

On 07/27/2018 02:51 AM, Michael Ellerman wrote:
> Gustavo Romero <gromero@linux.vnet.ibm.com> writes:
> 
>> Hi Michael,
>>
>> On 07/26/2018 09:24 AM, Michael Ellerman wrote:
>>> These tests are currently failing on (some) big endian systems. Until
>>> we can fix that, skip them unless we're on ppc64le.
>>
>> I can take a look at this.
> 
> Thanks!
> 
>> Is that the same issue related to the gcc version we discussed a month ago?
> 
> Maybe, I've forgotten :)
> 
>> If not, could you provide the crash logs as a starting point?
> 
> There's not much:
> 
>    test: tm_tar
>    tags: git_version:9794259
>    failure: tm_tar
>    run-parts: ./tm-tar exited with return code 1
>    
>    test: tm_vmxcopy
>    tags: git_version:9794259
>    !! child died by signal 11
>    failure: tm_vmxcopy
>    run-parts: ./tm-vmxcopy exited with return code 1
> 
> And now I can't get the sigreturn one to fail :/

OK. I _think_ there is a chance it's the same issue we've discussed.

Hence, just for the record (in case somebody else wants to take a look at it as
well), on the occasion we had the following log about tm-sigreturn, that
although is not listed above (only tm-tar and tm-vmxcopy are ), it's marked to
run only on LE as per this patch, just like tm-tar and tm-vmxcopy:

[  403.458953] Unexpected TM Bad Thing exception at c00000000006da4c (msr 0x201032)
13:59:41 console: [  403.459865] Oops: Unrecoverable exception, sig: 6 [#1]
13:59:41 console: [  403.460286] BE SMP NR_CPUS=32 NUMA pSeries
13:59:41 console: [  403.460611] Modules linked in:
13:59:41 console: [  403.460929] CPU: 2 PID: 25233 Comm: tm-sigreturn Not tainted 4.16.0-gcc6x-g709b973 #1
13:59:41 console: [  403.461530] NIP:  c00000000006da4c LR: c00000000001d6a4 CTR: 0000000000000000
13:59:41 console: [  403.461782] REGS: c00000003ffcbd80 TRAP: 0700   Not tainted  (4.16.0-gcc6x-g709b973)
13:59:41 console: [  403.461943] MSR:  8000000300201032 <SF,ME,IR,DR,RI,TM[SE]>  CR: 48004288  XER: 20000000
13:59:41 console: [  403.462112] CFAR: c00000000001d6a0 SOFTE: 3
13:59:41 console: [  403.462112] PACATMSCRATCH: 000000034280f032
13:59:41 console: [  403.462112] GPR00: d40000018c000001 c0000000f09bfc20 c00000000165bd00 c0000000fbd5c900
13:59:41 console: [  403.462112] GPR04: 8000000300009032 0000000000000000 0000000000000000 0000000000000000
13:59:41 console: [  403.462112] GPR08: 0000000000000000 0000000000000001 0000000000000001 0000000000000000
13:59:41 console: [  403.462112] GPR12: 0000000000000000 c00000003fffdf00 000000000000000f 000000001003a338
13:59:41 console: [  403.462112] GPR16: 0000000010002e5c 0000000010002e44 0000000010002d08 0000000010002e24
13:59:41 console: [  403.462112] GPR20: 0000000010002eac 0000000000000000 c0000000fbd5cdc8 c000000000df6578
13:59:41 console: [  403.462112] GPR24: 0000000000000000 bfffffffffffffff 00000000fffef394 00000000fffeee10
13:59:41 console: [  403.462112] GPR28: c0000000f09bfea0 000000000280d032 0000000000000000 c0000000fbd5c900
13:59:41 console: [  403.463642] NIP [c00000000006da4c] .tm_restore_sprs+0xc/0x1c
13:59:41 console: [  403.463782] LR [c00000000001d6a4] .tm_recheckpoint.part.7+0x54/0x90
13:59:41 console: [  403.463912] Call Trace:
13:59:41 console: [  403.464003] [c0000000f09bfc20] [c000000000312ae0] .__might_fault+0x70/0x90 (unreliable)
13:59:41 console: [  403.464165] [c0000000f09bfca0] [c00000000001b2b8] .restore_tm_user_regs.part.0+0x418/0x6c0
13:59:41 console: [  403.464326] [c0000000f09bfd70] [c00000000001c55c] .compat_sys_sigreturn+0x14c/0x490
13:59:41 console: [  403.464495] [c0000000f09bfe30] [c00000000000c070] system_call+0x58/0x6c
13:59:41 console: [  403.464624] Instruction dump:
13:59:41 console: [  403.464705] 7c800164 4e800020 7c0022a6 f80304b0 7c0222a6 f80304b8 7c0122a6 f80304c0
13:59:41 console: [  403.464868] 4e800020 e80304b0 7c0023a6 e80304b8 <7c0223a6> e80304c0 7c0123a6 4e800020
13:59:41 console: [  403.465030] ---[ end trace 0045efc572a679cf ]---

and some initial debugging by mpe using xmon:

Unexpected TM Bad Thing exception at c000000000069e6c (msr 0x201032)
cpu 0x6: Vector: 700 (Program Check) at [c00000003ff9bd80]
     pc: c000000000069e6c: .tm_restore_sprs+0xc/0x1c
     lr: c00000000001e2a4: .tm_recheckpoint.part.11+0x64/0xf0
     sp: c0000000f2a2bc30
    msr: 8000000300201032
   current = 0xc0000000fa68b000
   paca    = 0xc00000003fff8f00     softe: 3     irq_happened: 0x01
     pid   = 3879, comm = tm-sigreturn
Linux version 4.17.0-rc1-gcc-7.0.1-00005-g56376c5864f8 (michael@ka3.ozlabs.ibm.com) (gcc version 7.0.1 20170213 (experimental) (Custom 8e8a14c238db56c7)) #172 SMP Thu Apr 19 21:48:31 AEST 2018
enter ? for help
[link register   ] c00000000001e2a4 .tm_recheckpoint.part.11+0x64/0xf0
[c0000000f2a2bc30] c0000000f2a2bcb0 (unreliable)
[c0000000f2a2bcb0] c00000000001c30c .restore_tm_user_regs.part.0+0x55c/0x610
[c0000000f2a2bd70] c00000000001d400 .compat_sys_sigreturn+0x130/0x410
[c0000000f2a2be30] c00000000000c268 system_call+0x58/0x6c
--- Exception: 300 (Data Access) at 00000000100009c8
SP (ffd75e90) is in userspace
6:mon> di %pc
c000000000069e6c  7c0223a6    mtspr   130,r0
c000000000069e70  e80304c0    ld      r0,1216(r3)
c000000000069e74  7c0123a6    mtspr   129,r0
c000000000069e78  4e800020    blr
c000000000069e7c  7c03071d    tabort. r3
c000000000069e80  4e800020    blr
c000000000069e84  7ca00026    mfcr    r5
c000000000069e88  7c0802a6    mflr    r0
c000000000069e8c  90a10008    stw     r5,8(r1)
c000000000069e90  f8010010    std     r0,16(r1)
c000000000069e94  f8410028    std     r2,40(r1)
c000000000069e98  f821fe21    stdu    r1,-480(r1)
c000000000069e9c  f8610030    std     r3,48(r1)
c000000000069ea0  f9c100e0    std     r14,224(r1)
c000000000069ea4  f9e100e8    std     r15,232(r1)
c000000000069ea8  fa0100f0    std     r16,240(r1)


Best regards,
Gustavo

^ permalink raw reply

* Re: [PATCH] powerpc/mobility: Fix node detach/rename problem
From: Tyrel Datwyler @ 2018-07-31  0:59 UTC (permalink / raw)
  To: Michael Bringmann, linuxppc-dev
  Cc: Nathan Fontenot, Thomas Falcon, John Allen, mpe
In-Reply-To: <c2fba52a-baac-25fb-c26b-c84b25c3178c@linux.vnet.ibm.com>

On 07/29/2018 06:11 AM, Michael Bringmann wrote:
> During LPAR migration, the content of the device tree/sysfs may
> be updated including deletion and replacement of nodes in the
> tree.  When nodes are added to the internal node structures, they
> are appended in FIFO order to a list of nodes maintained by the
> OF code APIs.  When nodes are removed from the device tree, they
> are marked OF_DETACHED, but not actually deleted from the system
> to allow for pointers cached elsewhere in the kernel.  The order
> and content of the entries in the list of nodes is not altered,
> though.
> 
> During LPAR migration some common nodes are deleted and re-added
> e.g. "ibm,platform-facilities".  If a node is re-added to the OF
> node lists, the of_attach_node function checks to make sure that
> the name + ibm,phandle of the to-be-added data is unique.  As the
> previous copy of a re-added node is not modified beyond the addition
> of a bit flag, the code (1) finds the old copy, (2) prints a WARNING
> notice to the console, (3) renames the to-be-added node to avoid
> filename collisions within a directory, and (3) adds entries to
> the sysfs/kernfs.

So, this patch actually just band aids over the real problem. This is a long standing problem with several PFO drivers leaking references. The issue here is that, during the device tree update that follows a migration. the update of the ibm,platform-facilities node and friends below are always deleted and re-added on the destination lpar and subsequently the leaked references prevent the devices nodes from every actually being properly cleaned up after detach. Thus, leading to the issue you are observing.

As and example a quick look at nx-842-pseries.c reveals several issues.

static int __init nx842_pseries_init(void)
{
        struct nx842_devdata *new_devdata;
        int ret;

        if (!of_find_compatible_node(NULL, NULL, "ibm,compression"))
                return -ENODEV;

This call to of_find_compatible_node() results in a node returned with refcount incremented and therefore immediately leaked.

Further, the reconfig notifier logic makes the assumption that it only needs to deal with node updates, but as I outlined above the post migration device tree update always results in PFO nodes and properties being deleted and re-added.

/**
 * nx842_OF_notifier - Process updates to OF properties for the device
 *
 * @np: notifier block
 * @action: notifier action
 * @update: struct pSeries_reconfig_prop_update pointer if action is
 *      PSERIES_UPDATE_PROPERTY
 *
 * Returns:
 *      NOTIFY_OK on success
 *      NOTIFY_BAD encoded with error number on failure, use
 *              notifier_to_errno() to decode this value
 */
static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
                             void *data)
{
        struct of_reconfig_data *upd = data;
        struct nx842_devdata *local_devdata;
        struct device_node *node = NULL;

        rcu_read_lock();
        local_devdata = rcu_dereference(devdata);
        if (local_devdata)
                node = local_devdata->dev->of_node;

        if (local_devdata &&
                        action == OF_RECONFIG_UPDATE_PROPERTY &&
                        !strcmp(upd->dn->name, node->name)) {
                rcu_read_unlock();
                nx842_OF_upd(upd->prop);
        } else
                rcu_read_unlock();

        return NOTIFY_OK;
}

I expect to find the same problems in pseries-rng.c and nx.c.

-Tyrel

> 
> This patch fixes the 'migration add' problem by changing the
> stored 'phandle' of the OF_DETACHed node to 0 (reserved value for
> of_find_node_by_phandle), so that subsequent re-add operations,
> such as those during migration, do not find the detached node,
> do not observe duplicate names, do not rename them,  and the
> extra WARNING notices are removed from the console output.
> 
> In addition, it erases the 'name' field of the OF_DETACHED node,
> to prevent any future calls to of_find_node_by_name() or
> of_find_node_by_path() from matching this node.
> 
> Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/dlpar.c |    3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
> index 2de0f0d..9d82c28 100644
> --- a/arch/powerpc/platforms/pseries/dlpar.c
> +++ b/arch/powerpc/platforms/pseries/dlpar.c
> @@ -274,6 +274,9 @@ int dlpar_detach_node(struct device_node *dn)
>  	if (rc)
>  		return rc;
> 
> +	dn->phandle = 0;
> +	memset(dn->name, 0, strlen(dn->name));
> +
>  	return 0;
>  }
> 
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox