LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] powerpc/32: define helpers to get L1 cache sizes.
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <239d1c8f15b8bedc161a234f9f1a22a07160dbdf.1557824379.git.christophe.leroy@c-s.fr>

This patch defines C helpers to retrieve the size of
cache blocks and uses them in the cacheflush functions.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cache.h      | 16 ++++++++++++++--
 arch/powerpc/include/asm/cacheflush.h | 24 +++++++++++++++---------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..0009a0a82e86 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,7 +33,8 @@
 
 #define IFETCH_ALIGN_BYTES	(1 << IFETCH_ALIGN_SHIFT)
 
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
 
 struct ppc_cache_info {
 	u32 size;
@@ -53,7 +54,18 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+#else
+static inline u32 l1_cache_shift(void)
+{
+	return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+	return L1_CACHE_BYTES;
+}
+#endif
+#endif /* ! __ASSEMBLY__ */
 
 #if defined(__ASSEMBLY__)
 /*
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e9a40b110f1d..d405f18441cd 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -64,11 +64,13 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbf(addr);
 	mb();	/* sync */
 }
@@ -80,11 +82,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
  */
 static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbst(addr);
 	mb();	/* sync */
 }
@@ -97,11 +101,13 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 static inline void invalidate_dcache_range(unsigned long start,
 					   unsigned long stop)
 {
-	void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-	unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+	unsigned long shift = l1_cache_shift();
+	unsigned long bytes = l1_cache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
-	for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbi(addr);
 	mb();	/* sync */
 }
-- 
2.13.3


^ permalink raw reply related

* [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <239d1c8f15b8bedc161a234f9f1a22a07160dbdf.1557824379.git.christophe.leroy@c-s.fr>

This patch drops the assembly PPC64 version of flush_dcache_range()
and re-uses the PPC32 static inline version.

With GCC 8.1, the following code is generated:

void flush_test(unsigned long start, unsigned long stop)
{
	flush_dcache_range(start, stop);
}

0000000000000130 <.flush_test>:
 130:	3d 22 00 00 	addis   r9,r2,0
			132: R_PPC64_TOC16_HA	.data+0x8
 134:	81 09 00 00 	lwz     r8,0(r9)
			136: R_PPC64_TOC16_LO	.data+0x8
 138:	3d 22 00 00 	addis   r9,r2,0
			13a: R_PPC64_TOC16_HA	.data+0xc
 13c:	80 e9 00 00 	lwz     r7,0(r9)
			13e: R_PPC64_TOC16_LO	.data+0xc
 140:	7d 48 00 d0 	neg     r10,r8
 144:	7d 43 18 38 	and     r3,r10,r3
 148:	7c 00 04 ac 	hwsync
 14c:	4c 00 01 2c 	isync
 150:	39 28 ff ff 	addi    r9,r8,-1
 154:	7c 89 22 14 	add     r4,r9,r4
 158:	7c 83 20 50 	subf    r4,r3,r4
 15c:	7c 89 3c 37 	srd.    r9,r4,r7
 160:	41 82 00 1c 	beq     17c <.flush_test+0x4c>
 164:	7d 29 03 a6 	mtctr   r9
 168:	60 00 00 00 	nop
 16c:	60 00 00 00 	nop
 170:	7c 00 18 ac 	dcbf    0,r3
 174:	7c 63 42 14 	add     r3,r3,r8
 178:	42 00 ff f8 	bdnz    170 <.flush_test+0x40>
 17c:	7c 00 04 ac 	hwsync
 180:	4c 00 01 2c 	isync
 184:	4e 80 00 20 	blr
 188:	60 00 00 00 	nop
 18c:	60 00 00 00 	nop

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cache.h      | 10 ++++++++++
 arch/powerpc/include/asm/cacheflush.h | 14 ++++++++------
 arch/powerpc/kernel/misc_64.S         | 29 -----------------------------
 3 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 0009a0a82e86..45e3137ccd71 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -54,6 +54,16 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
+
+static inline u32 l1_cache_shift(void)
+{
+	return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+	return ppc64_caches.l1d.block_size;
+}
 #else
 static inline u32 l1_cache_shift(void)
 {
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d405f18441cd..3cd7ce3dec8b 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -57,7 +57,6 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
 }
 #endif
 
-#ifdef CONFIG_PPC32
 /*
  * Write any modified data cache blocks out to memory and invalidate them.
  * Does not invalidate the corresponding instruction cache blocks.
@@ -70,9 +69,17 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
 	unsigned long i;
 
+	if (IS_ENABLED(CONFIG_PPC64)) {
+		mb();	/* sync */
+		isync();
+	}
+
 	for (i = 0; i < size >> shift; i++, addr += bytes)
 		dcbf(addr);
 	mb();	/* sync */
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		isync();
 }
 
 /*
@@ -112,11 +119,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 	mb();	/* sync */
 }
 
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#endif
-
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
 	do { \
 		memcpy(dst, src, len); \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a4fd536efb44..1b0a42c50ef1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -115,35 +115,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
 EXPORT_SYMBOL(flush_icache_range)
 
 /*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_dcache_range)
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	sync
-	isync
-	mtctr	r8
-0:	dcbf	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	isync
-	blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-/*
  * Flush a particular page from the data cache to RAM.
  * Note: this is necessary because the instruction cache does *not*
  * snoop from the data cache.
-- 
2.13.3


^ permalink raw reply related

* [PATCH 1/4] powerpc/64: flush_inval_dcache_range() becomes flush_dcache_range()
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel

On most arches having function flush_dcache_range(), including PPC32,
this function does a writeback and invalidation of the cache bloc.

On PPC64, flush_dcache_range() only does a writeback while
flush_inval_dcache_range() does the invalidation in addition.

In addition it looks like within arch/powerpc/, there are no PPC64
platforms using flush_dcache_range()

This patch drops the existing 64 bits version of flush_dcache_range()
and renames flush_inval_dcache_range() into flush_dcache_range().

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/cacheflush.h |  1 -
 arch/powerpc/kernel/misc_64.S         | 27 ++-------------------------
 arch/powerpc/lib/pmem.c               |  8 ++++----
 arch/powerpc/mm/mem.c                 |  4 ++--
 arch/powerpc/sysdev/dart_iommu.c      |  2 +-
 drivers/macintosh/smu.c               |  4 ++--
 6 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index d5a8d7bf0759..e9a40b110f1d 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -109,7 +109,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 #endif /* CONFIG_PPC32 */
 #ifdef CONFIG_PPC64
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
 #endif
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba9481781..a4fd536efb44 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -121,31 +121,8 @@ EXPORT_SYMBOL(flush_icache_range)
  *
  *    flush all bytes from start to stop-1 inclusive
  */
-_GLOBAL_TOC(flush_dcache_range)
 
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)	/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-0:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
  	ld	r10,PPC64_CACHES@toc(r2)
 	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
 	addi	r5,r7,-1
@@ -164,7 +141,7 @@ _GLOBAL(flush_inval_dcache_range)
 	sync
 	isync
 	blr
-
+EXPORT_SYMBOL(flush_dcache_range)
 
 /*
  * Flush a particular page from the data cache to RAM.
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 53c018762e1c..36e08bf850e0 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -23,14 +23,14 @@
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 }
 EXPORT_SYMBOL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 }
 EXPORT_SYMBOL(arch_invalidate_pmem);
 
@@ -43,7 +43,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
 	unsigned long copied, start = (unsigned long) dest;
 
 	copied = __copy_from_user(dest, src, size);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return copied;
 }
@@ -53,7 +53,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
 	unsigned long start = (unsigned long) dest;
 
 	memcpy(dest, src, size);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return dest;
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd525d709072..39e66f033995 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -125,7 +125,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm
 			start, start + size, rc);
 		return -EFAULT;
 	}
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 
 	return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 }
@@ -153,7 +153,7 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size,
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_inval_dcache_range(start, start + size);
+	flush_dcache_range(start, start + size);
 	ret = remove_section_mapping(start, start + size);
 
 	/* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 2a751795ec1e..bc259a8d3f2d 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -158,7 +158,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count)
 	unsigned int tmp;
 
 	/* Perform a standard cache flush */
-	flush_inval_dcache_range(start, end);
+	flush_dcache_range(start, end);
 
 	/*
 	 * Perform the sequence described in the CPC925 manual to
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 6a844125cf2d..97758eed03f2 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -133,7 +133,7 @@ static void smu_start_cmd(void)
 	/* Flush command and data to RAM */
 	faddr = (unsigned long)smu->cmd_buf;
 	fend = faddr + smu->cmd_buf->length + 2;
-	flush_inval_dcache_range(faddr, fend);
+	flush_dcache_range(faddr, fend);
 
 
 	/* We also disable NAP mode for the duration of the command
@@ -195,7 +195,7 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
 		 * reply length (it's only 2 cache lines anyway)
 		 */
 		faddr = (unsigned long)smu->cmd_buf;
-		flush_inval_dcache_range(faddr, faddr + 256);
+		flush_dcache_range(faddr, faddr + 256);
 
 		/* Now check ack */
 		ack = (~cmd->cmd) & 0xff;
-- 
2.13.3


^ permalink raw reply related

* [PATCH 2/4] powerpc/32: activate ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE
From: Christophe Leroy @ 2019-05-14  9:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Oliver O'Halloran, Segher Boessenkool
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <239d1c8f15b8bedc161a234f9f1a22a07160dbdf.1557824379.git.christophe.leroy@c-s.fr>

PPC32 also have flush_dcache_range() so it can also support
ARCH_HAS_PMEM_API and ARCH_HAS_UACCESS_FLUSHCACHE without changes.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7996cfaceca..cf6e30f637be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -127,13 +127,13 @@ config PPC
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MMIOWB			if PPC64
 	select ARCH_HAS_PHYS_TO_DMA
-	select ARCH_HAS_PMEM_API                if PPC64
+	select ARCH_HAS_PMEM_API
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_MEMBARRIER_CALLBACKS
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
 	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
-	select ARCH_HAS_UACCESS_FLUSHCACHE	if PPC64
+	select ARCH_HAS_UACCESS_FLUSHCACHE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_ZONE_DEVICE		if PPC_BOOK3S_64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-- 
2.13.3


^ permalink raw reply related

* Re: [PATCH] vsprintf: Do not break early boot with probing addresses
From: Geert Uytterhoeven @ 2019-05-14  9:02 UTC (permalink / raw)
  To: David Laight
  Cc: Petr Mladek, linux-arch@vger.kernel.org, Sergey Senozhatsky,
	Heiko Carstens, linux-s390@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, Rasmus Villemoes,
	linux-kernel@vger.kernel.org, Steven Rostedt, Michal Hocko,
	Sergey Senozhatsky, Stephen Rothwell, Andy Shevchenko,
	Linus Torvalds, Martin Schwidefsky, Tobin C . Harding
In-Reply-To: <45348cf615fe40d383c1a25688d4a88f@AcuMS.aculab.com>

On Tue, May 14, 2019 at 10:29 AM David Laight <David.Laight@aculab.com> wrote:
> > And I like Steven's "(fault)" idea.
> > How about this:
> >
> >       if ptr < PAGE_SIZE              -> "(null)"
> >       if IS_ERR_VALUE(ptr)            -> "(fault)"
> >
> >       -ss
>
> Or:
>         if (ptr < PAGE_SIZE)
>                 return ptr ? "(null+)" : "(null)";
>         if IS_ERR_VALUE(ptr)
>                 return "(errno)"

Do we care about the value? "(-E%u)"?

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: Kernel OOPS followed by a panic on next20190507 with 4K page size
From: Sachin Sant @ 2019-05-14  8:57 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-next, linuxppc-dev
In-Reply-To: <0414d06e-1c4e-e9ec-e265-fd9662308df8@linux.ibm.com>



> On 14-May-2019, at 7:00 AM, Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote:
> 
> On 5/8/19 4:30 PM, Sachin Sant wrote:
>> While running LTP tests (specifically futex_wake04) against next-20199597
>> build with 4K page size on a POWER8 LPAR following crash is observed.
>> [ 4233.214876] BUG: Kernel NULL pointer dereference at 0x0000001c
>> [ 4233.214898] Faulting instruction address: 0xc000000001d1e58c
>> [ 4233.214905] Oops: Kernel access of bad area, sig: 11 [#1]
>> [ 4233.214911] LE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
>> [ 4233.214920] Dumping ftrace buffer:
>> [ 4233.214928]    (ftrace buffer empty)
>> [ 4233.214933] Modules linked in: overlay rpadlpar_io rpaphp iptable_mangle xt_MASQUERADE iptable_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc kvm iptable_filter pseries_rng rng_core vmx_crypto ip_tables x_tables autofs4 [last unloaded: dummy_del_mod]
>> [ 4233.214973] CPU: 3 PID: 4635 Comm: futex_wake04 Tainted: G        W  O      5.1.0-next-20190507-autotest #1
>> [ 4233.214980] NIP:  c000000001d1e58c LR: c000000001d1e54c CTR: 0000000000000000
>> [ 4233.214987] REGS: c000000004937890 TRAP: 0300   Tainted: G        W  O       (5.1.0-next-20190507-autotest)
>> [ 4233.214993] MSR:  8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22424822  XER: 00000000
>> [ 4233.215005] CFAR: c00000000183e9e0 DAR: 000000000000001c DSISR: 40000000 IRQMASK: 0
>> [ 4233.215005] GPR00: c000000001901a80 c000000004937b20 c000000003938700 0000000000000000
>> [ 4233.215005] GPR04: 0000000000400cc0 000000000003efff 000000027966e000 c000000003ba8700
>> [ 4233.215005] GPR08: c000000003ba8700 000000000d601125 c000000003ba8700 0000000080000000
>> [ 4233.215005] GPR12: 0000000022424822 c00000001ecae280 0000000000000000 0000000000000000
>> [ 4233.215005] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> [ 4233.215005] GPR20: 0000000000000018 c0000000039e2d30 c0000000039e2d28 c0000002762da460
>> [ 4233.215005] GPR24: 000000000000001c 0000000000000000 0000000000000001 c000000001901a80
>> [ 4233.215005] GPR28: 0000000000400cc0 0000000000000000 0000000000000000 0000000000400cc0
>> [ 4233.215065] NIP [c000000001d1e58c] kmem_cache_alloc+0xbc/0x5a0
>> [ 4233.215071] LR [c000000001d1e54c] kmem_cache_alloc+0x7c/0x5a0
>> [ 4233.215075] Call Trace:
>> [ 4233.215081] [c000000004937b20] [c000000001c91150] __pud_alloc+0x160/0x200 (unreliable)
>> [ 4233.215090] [c000000004937b80] [c000000001901a80] huge_pte_alloc+0x580/0x950
>> [ 4233.215098] [c000000004937c00] [c000000001cf7910] hugetlb_fault+0x9a0/0x1250
>> [ 4233.215106] [c000000004937ce0] [c000000001c94a80] handle_mm_fault+0x490/0x4a0
>> [ 4233.215114] [c000000004937d20] [c0000000018d529c] __do_page_fault+0x77c/0x1f00
>> [ 4233.215121] [c000000004937e00] [c0000000018d6a48] do_page_fault+0x28/0x50
>> [ 4233.215129] [c000000004937e20] [c00000000183b0d4] handle_page_fault+0x18/0x38
>> [ 4233.215135] Instruction dump:
>> [ 4233.215139] 39290001 f92ac1b0 419e009c 3ce20027 3ba00000 e927c1f0 39290001 f927c1f0
>> [ 4233.215149] 3d420027 e92ac290 39290001 f92ac290 <8359001c> 83390018 60000000 3ce20027
> 
> I did send a patch to the list to handle page allocation failures in this patch. But i guess what we are finding here is get_current() crashing. Any chance to bisect this?
> 

Following commit seems to have introduced this problem.

723f268f19 - powerpc/mm: cleanup ifdef mess in add_huge_page_size()

Reverting this patch allows the test case to execute properly without a crash.

Thanks
-Sachin

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/8xx: Add microcode patch to move SMC parameter RAM.
From: Christophe Leroy @ 2019-05-14  8:35 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Vitaly Bordug, Scott Wood
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <73ea3141-e48a-5647-aabc-370fe57585bc@c-s.fr>

[-- Attachment #1: Type: text/plain, Size: 2328 bytes --]



Le 14/05/2019 à 10:31, Christophe Leroy a écrit :
> 
> 
> Le 14/05/2019 à 08:56, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> Some SCC functions like the QMC requires an extended parameter RAM.
>>> On modern 8xx (ie 866 and 885), SPI area can already be relocated,
>>> allowing the use of those functions on SCC2. But SCC3 and SCC4
>>> parameter RAM collide with SMC1 and SMC2 parameter RAMs.
>>>
>>> This patch adds microcode to allow the relocation of both SMC1 and
>>> SMC2, and relocate them at offsets 0x1ec0 and 0x1fc0.
>>> Those offsets are by default for the CPM1 DSP1 and DSP2, but there
>>> is no kernel driver using them at the moment so this area can be
>>> reused.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>> ---
>>>   arch/powerpc/platforms/8xx/Kconfig      |   7 ++
>>>   arch/powerpc/platforms/8xx/micropatch.c | 109 
>>> +++++++++++++++++++++++++++++++-
>>>   2 files changed, 114 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/powerpc/platforms/8xx/micropatch.c 
>>> b/arch/powerpc/platforms/8xx/micropatch.c
>>> index 33a9042fca80..dc4423daf7d4 100644
>>> --- a/arch/powerpc/platforms/8xx/micropatch.c
>>> +++ b/arch/powerpc/platforms/8xx/micropatch.c
>>> @@ -622,6 +622,86 @@ static uint patch_2f00[] __initdata = {
>>>   };
>>>   #endif
>>> +/*
>>> + * SMC relocation patch arrays.
>>> + */
>>> +
>>> +#ifdef CONFIG_SMC_UCODE_PATCH
>>> +
>>> +static uint patch_2000[] __initdata = {
>>> +    0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
>>> +    0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000,
>>> +    0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2,
>>
>> Do we have any doc on what these values are?
> 
> No we don't
> 
> 
>>
>> I get that it's microcode but do we have any more detail than that?
>> What's the source etc?
>>
> 
> There is an Engineering Bulletin (EB662) dated 2006 from Freescale which 
> slightly describe things and there are associated S-Record files 
> containing those values.

Find attached the said doc and files. Not sure it will go through the 
list. Can't find it on NXP website thought, that must be too old.

Christophe

> 
> And an old related message in the mailing list 
> https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg46038.html
> 
> Christophe

[-- Attachment #2: MPC8xxMC01.zip --]
[-- Type: application/octet-stream, Size: 88593 bytes --]

^ permalink raw reply

* Re: [RFC PATCH] powerpc/64/ftrace: mprofile-kernel patch out mflr
From: Naveen N. Rao @ 2019-05-14  8:32 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman, Nicholas Piggin
In-Reply-To: <87tvdytwo0.fsf@concordia.ellerman.id.au>

Michael Ellerman wrote:
> "Naveen N. Rao" <naveen.n.rao@linux.ibm.com> writes:
>> Michael Ellerman wrote:
>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>> The new mprofile-kernel mcount sequence is
>>>>
>>>>   mflr	r0
>>>>   bl	_mcount
>>>>
>>>> Dynamic ftrace patches the branch instruction with a noop, but leaves
>>>> the mflr. mflr is executed by the branch unit that can only execute one
>>>> per cycle on POWER9 and shared with branches, so it would be nice to
>>>> avoid it where possible.
>>>>
>>>> This patch is a hacky proof of concept to nop out the mflr. Can we do
>>>> this or are there races or other issues with it?
>>> 
>>> There's a race, isn't there?
>>> 
>>> We have a function foo which currently has tracing disabled, so the mflr
>>> and bl are nop'ed out.
>>> 
>>>   CPU 0			CPU 1
>>>   ==================================
>>>   bl foo
>>>   nop (ie. not mflr)
>>>   -> interrupt
>>>   something else	enable tracing for foo
>>>   ...			patch mflr and branch
>>>   <- rfi
>>>   bl _mcount
>>> 
>>> So we end up in _mcount() but with r0 not populated.
>>
>> Good catch! Looks like we need to patch the mflr with a "b +8" similar 
>> to what we do in __ftrace_make_nop().
> 
> Would that actually make it any faster though? Nick?

Ok, how about doing this as a 2-step process?
1. patch 'mflr r0' with a 'b +8'
   synchronize_rcu_tasks()
2. convert 'b +8' to a 'nop'

- Naveen



^ permalink raw reply

* Re: [PATCH 2/2] powerpc/8xx: Add microcode patch to move SMC parameter RAM.
From: Christophe Leroy @ 2019-05-14  8:31 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Vitaly Bordug, Scott Wood
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87a7fptth7.fsf@concordia.ellerman.id.au>



Le 14/05/2019 à 08:56, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> Some SCC functions like the QMC requires an extended parameter RAM.
>> On modern 8xx (ie 866 and 885), SPI area can already be relocated,
>> allowing the use of those functions on SCC2. But SCC3 and SCC4
>> parameter RAM collide with SMC1 and SMC2 parameter RAMs.
>>
>> This patch adds microcode to allow the relocation of both SMC1 and
>> SMC2, and relocate them at offsets 0x1ec0 and 0x1fc0.
>> Those offsets are by default for the CPM1 DSP1 and DSP2, but there
>> is no kernel driver using them at the moment so this area can be
>> reused.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   arch/powerpc/platforms/8xx/Kconfig      |   7 ++
>>   arch/powerpc/platforms/8xx/micropatch.c | 109 +++++++++++++++++++++++++++++++-
>>   2 files changed, 114 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c
>> index 33a9042fca80..dc4423daf7d4 100644
>> --- a/arch/powerpc/platforms/8xx/micropatch.c
>> +++ b/arch/powerpc/platforms/8xx/micropatch.c
>> @@ -622,6 +622,86 @@ static uint patch_2f00[] __initdata = {
>>   };
>>   #endif
>>   
>> +/*
>> + * SMC relocation patch arrays.
>> + */
>> +
>> +#ifdef CONFIG_SMC_UCODE_PATCH
>> +
>> +static uint patch_2000[] __initdata = {
>> +	0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
>> +	0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000,
>> +	0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2,
> 
> Do we have any doc on what these values are?

No we don't


> 
> I get that it's microcode but do we have any more detail than that?
> What's the source etc?
> 

There is an Engineering Bulletin (EB662) dated 2006 from Freescale which 
slightly describe things and there are associated S-Record files 
containing those values.

And an old related message in the mailing list 
https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg46038.html

Christophe


^ permalink raw reply

* Re: [PATCH] EDAC, mpc85xx: Prevent building as a module
From: Borislav Petkov @ 2019-05-14  8:31 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Johannes Thumshirn, linux-kernel, linuxppc-dev, james.morse,
	mchehab, linux-edac
In-Reply-To: <87d0klttpy.fsf@concordia.ellerman.id.au>

On Tue, May 14, 2019 at 04:50:49PM +1000, Michael Ellerman wrote:
> Looks good. I even booted it :)

Cool, thanks!

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply

* RE: [PATCH] vsprintf: Do not break early boot with probing addresses
From: David Laight @ 2019-05-14  8:28 UTC (permalink / raw)
  To: 'Sergey Senozhatsky', Petr Mladek
  Cc: linux-arch@vger.kernel.org, Heiko Carstens,
	linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	Rasmus Villemoes, linux-kernel@vger.kernel.org, Steven Rostedt,
	Michal Hocko, Sergey Senozhatsky, Stephen Rothwell,
	Andy Shevchenko, Linus Torvalds, Martin Schwidefsky,
	Tobin C . Harding
In-Reply-To: <20190514020730.GA651@jagdpanzerIV>

> And I like Steven's "(fault)" idea.
> How about this:
> 
> 	if ptr < PAGE_SIZE		-> "(null)"
> 	if IS_ERR_VALUE(ptr)		-> "(fault)"
> 
> 	-ss

Or:
	if (ptr < PAGE_SIZE)
		return ptr ? "(null+)" : "(null)";
	if IS_ERR_VALUE(ptr)
		return "(errno)"

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* Re: [v2 2/2] [PowerPC] Allow use of SIMD in interrupts from kernel code
From: Russell Currey @ 2019-05-14  7:22 UTC (permalink / raw)
  To: Shawn Landden; +Cc: linuxppc-dev, Paul Mackerras
In-Reply-To: <20190514022308.32363-2-shawn@git.icu>

On Mon, 2019-05-13 at 23:23 -0300, Shawn Landden wrote:
> This second patch is separate because it could be wrong,
> like I am not sure about how kernel thread migration works,
> and it is even allowing simd in preemptible kernel code.
> 
> Signed-off-by: Shawn Landden <shawn@git.icu>
> ---

Hi Shawn,

This patch doesn't build on 64-bit embedded (ppc64e_defconfig):

arch/powerpc/kernel/process.c:194:13: error: 'interrupted_kernel_fpu_idle' defined but not used [-Werror=unused-function]
 static bool interrupted_kernel_fpu_idle(void)
             ^~~~~~~~~~~~~~~~~~~~~~~~~~~

and otherwise adds two sparse warnings:

+arch/powerpc/kernel/process.c:356:13: warning: function 'disable_kernel_altivec' with external linkage has definition
+arch/powerpc/kernel/process.c:416:6: warning: symbol 'may_use_simd' was not declared. Should it be static?

There's also some style issues (spaces instead of tabs).

Reported by snowpatch (see https://patchwork.ozlabs.org/patch/1099181/)

- Russell


^ permalink raw reply

* Re: Build failure in v4.4.y.queue (ppc:allmodconfig)
From: Michael Ellerman @ 2019-05-14  7:15 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Greg Kroah-Hartman, linuxppc-dev, stable
In-Reply-To: <20190509162340.GB24493@roeck-us.net>

Guenter Roeck <linux@roeck-us.net> writes:
> On Fri, May 10, 2019 at 12:31:05AM +1000, Michael Ellerman wrote:
>> Greg Kroah-Hartman <gregkh@linuxfoundation.org> writes:
>> > On Wed, May 08, 2019 at 01:26:42PM -0700, Guenter Roeck wrote:
>> >> I see multiple instances of:
>> >> 
>> >> arch/powerpc/kernel/exceptions-64s.S:839: Error:
>> >> 	attempt to move .org backwards
>> >> 
>> >> in v4.4.y.queue (v4.4.179-143-gc4db218e9451).
>> >> 
>> >> This is due to commit 9b2d4e06d7f1 ("powerpc/64s: Add support for a store
>> >> forwarding barrier at kernel entry/exit"), which is part of a large patch
>> >> series and can not easily be reverted.
>> >> 
>> >> Guess I'll stop doing ppc:allmodconfig builds in v4.4.y ?
>> >
>> > Michael, I thought this patch series was supposed to fix ppc issues, not
>> > add to them :)
>> 
>> Well it fixes some, but creates others :}
>> 
>> > Any ideas on what to do here?
>> 
>> Turning off CONFIG_CBE_RAS (obscure IBM Cell Blade RAS features) is
>> sufficient to get it building. Is that an option for your build tests
>> Guenter?
>> 
> I could turn it off unconditionally, meaning it would affect all other
> branches. I would rather stop building ppc:allmodconfig for v4.4.y.

Yeah fine by me. No one actually runs allmodconfig anyway.

cheers

^ permalink raw reply

* Re: [PATCH v2] powerpc: Fix compile issue with force DAWR
From: Christophe Leroy @ 2019-05-14  7:06 UTC (permalink / raw)
  To: Michael Neuling, mpe; +Cc: linuxppc-dev
In-Reply-To: <eae1df7b0d329f2be6da0322210026c711d38bdc.camel@neuling.org>



Le 14/05/2019 à 08:55, Michael Neuling a écrit :
> 
[...]


> 
>>>>> +
>>>>> +static ssize_t dawr_write_file_bool(struct file *file,
>>>>> +				    const char __user *user_buf,
>>>>> +				    size_t count, loff_t *ppos)
>>>>> +{
>>>>> +	struct arch_hw_breakpoint null_brk = {0, 0, 0};
>>>>> +	size_t rc;
>>>>> +
>>>>> +	/* Send error to user if they hypervisor won't allow us to write
>>>>> DAWR */
>>>>> +	if ((!dawr_force_enable) &&
>>>>> +	    (firmware_has_feature(FW_FEATURE_LPAR)) &&
>>>>> +	    (set_dawr(&null_brk) != H_SUCCESS))
>>>>
>>>> The above is not real clear.
>>>> set_dabr() returns 0, H_SUCCESS is not used there.
>>>
>>> It pseries_set_dawr() will return a hcall number.
>>
>> Right, then it maybe means set_dawr() should be fixes ?
> 
> Sorry, I don't understand this.

I meant set_dawr() should be fixed:

As the above test hide value 0 by using H_SUCCESS for the test, in order 
to ease understanding, set_dawr() should return H_SUCCESS instead of 
return 0;

Christophe

> 
>>> This code hasn't changed. I'm just moving it.
>>
>> Right, but could be an improvment for another patch.
>> As far as I remember you are the one who wrote that code at first place,
>> arent't you ?
> 
> Yep, classic crap Mikey code :-)
> 
> Mikey
> 

^ permalink raw reply

* Re: [Bug 203597] New: kernel 4.9.175 fails to boot on a PowerMac G4 3,6 at early stage
From: Greg KH @ 2019-05-14  7:05 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: erhard_f, linuxppc-dev, stable@vger.kernel.org
In-Reply-To: <e1a6f0c2-c93a-05bd-0623-ccffa733c5a7@c-s.fr>

On Tue, May 14, 2019 at 06:56:03AM +0200, Christophe Leroy wrote:
> Hi Greg,
> 
> Could you please apply b45ba4a51cde29b2939365ef0c07ad34c8321789 to 4.9 since
> 51c3c62b58b357e8d35e4cc32f7b4ec907426fe3 was applied allthought marked for
> #4.13+

It does not apply there (nor to the 4.4.y queue, which will need it as
well), so can you provide a working backport so that I can queue it up?

thanks,

greg k-h

^ permalink raw reply

* Re: [RESEND PATCH] powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt
From: Michael Ellerman @ 2019-05-14  7:02 UTC (permalink / raw)
  To: Gautham R. Shenoy, Paul Mackerras, Nicholas Piggin,
	Aneesh Kumar K.V
  Cc: Gautham R. Shenoy, linuxppc-dev, linux-kernel
In-Reply-To: <1557480294-808-1-git-send-email-ego@linux.vnet.ibm.com>

"Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>
> Subject: Re: [RESEND PATCH] powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt

ps. A "RESEND" implies the patch is unchanged and you're just resending
it because it was ignored.

In this case it should have just been "PATCH v2", with a note below the "---"
saying "v2: Rebased onto powerpc/next ..."

cheers

> During a memory hotplug operations involving resizing of the HPT, we
> invoke a stop_machine() to perform the resizing. In this code path, we
> end up recursively taking the cpu_hotplug_lock, first in
> memory_hotplug_begin() and then subsequently in stop_machine(). This
> causes the system to hang. With lockdep enabled we get the following
> error message before the hang.
>
>   swapper/0/1 is trying to acquire lock:
>   (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60
>
>   but task is already holding lock:
>   (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50
>
>   other info that might help us debug this:
>    Possible unsafe locking scenario:
>
>          CPU0
>          ----
>     lock(cpu_hotplug_lock.rw_sem);
>     lock(cpu_hotplug_lock.rw_sem);
>
>    *** DEADLOCK ***
>
> Fix this issue by
>   1) Requiring all the calls to pseries_lpar_resize_hpt() be made
>      with cpu_hotplug_lock held.
>
>   2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked()
>      as a consequence of 1)
>
>   3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt()
>      with cpu_hotplug_lock held.
>
> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>
> Rebased this one against powerpc/next instead of linux/master.
>
>  arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++-
>  arch/powerpc/platforms/pseries/lpar.c | 8 ++++++--
>  2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 919a861..d07fcafd 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -38,6 +38,7 @@
>  #include <linux/libfdt.h>
>  #include <linux/pkeys.h>
>  #include <linux/hugetlb.h>
> +#include <linux/cpu.h>
>  
>  #include <asm/debugfs.h>
>  #include <asm/processor.h>
> @@ -1928,10 +1929,16 @@ static int hpt_order_get(void *data, u64 *val)
>  
>  static int hpt_order_set(void *data, u64 val)
>  {
> +	int ret;
> +
>  	if (!mmu_hash_ops.resize_hpt)
>  		return -ENODEV;
>  
> -	return mmu_hash_ops.resize_hpt(val);
> +	cpus_read_lock();
> +	ret = mmu_hash_ops.resize_hpt(val);
> +	cpus_read_unlock();
> +
> +	return ret;
>  }
>  
>  DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 1034ef1..2fc9756 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -859,7 +859,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
>  	return 0;
>  }
>  
> -/* Must be called in user context */
> +/*
> + * Must be called in user context. The caller should hold the
> + * cpus_lock.
> + */
>  static int pseries_lpar_resize_hpt(unsigned long shift)
>  {
>  	struct hpt_resize_state state = {
> @@ -913,7 +916,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
>  
>  	t1 = ktime_get();
>  
> -	rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
> +	rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
> +				     &state, NULL);
>  
>  	t2 = ktime_get();
>  
> -- 
> 1.9.4

^ permalink raw reply

* Re: [RESEND PATCH] powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt
From: Michael Ellerman @ 2019-05-14  7:00 UTC (permalink / raw)
  To: Gautham R. Shenoy, Paul Mackerras, Nicholas Piggin,
	Aneesh Kumar K.V
  Cc: Gautham R. Shenoy, linuxppc-dev, linux-kernel
In-Reply-To: <1557480294-808-1-git-send-email-ego@linux.vnet.ibm.com>

"Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>
> During a memory hotplug operations involving resizing of the HPT, we
> invoke a stop_machine() to perform the resizing. In this code path, we
> end up recursively taking the cpu_hotplug_lock, first in
> memory_hotplug_begin() and then subsequently in stop_machine(). This
> causes the system to hang.

This implies we have never tested a memory hotplug that resized the HPT.
Is that really true? Or did something change?

> With lockdep enabled we get the following
> error message before the hang.
>
>   swapper/0/1 is trying to acquire lock:
>   (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60
>
>   but task is already holding lock:
>   (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50

Do we have the full stack trace?

>   other info that might help us debug this:
>    Possible unsafe locking scenario:
>
>          CPU0
>          ----
>     lock(cpu_hotplug_lock.rw_sem);
>     lock(cpu_hotplug_lock.rw_sem);
>
>    *** DEADLOCK ***
>
> Fix this issue by
>   1) Requiring all the calls to pseries_lpar_resize_hpt() be made
>      with cpu_hotplug_lock held.
>
>   2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked()
>      as a consequence of 1)
>
>   3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt()
>      with cpu_hotplug_lock held.
>
> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>
> Rebased this one against powerpc/next instead of linux/master.
>
>  arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++-
>  arch/powerpc/platforms/pseries/lpar.c | 8 ++++++--
>  2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 919a861..d07fcafd 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -38,6 +38,7 @@
>  #include <linux/libfdt.h>
>  #include <linux/pkeys.h>
>  #include <linux/hugetlb.h>
> +#include <linux/cpu.h>
>  
>  #include <asm/debugfs.h>
>  #include <asm/processor.h>
> @@ -1928,10 +1929,16 @@ static int hpt_order_get(void *data, u64 *val)
>  
>  static int hpt_order_set(void *data, u64 val)
>  {
> +	int ret;
> +
>  	if (!mmu_hash_ops.resize_hpt)
>  		return -ENODEV;
>  
> -	return mmu_hash_ops.resize_hpt(val);
> +	cpus_read_lock();
> +	ret = mmu_hash_ops.resize_hpt(val);
> +	cpus_read_unlock();
> +
> +	return ret;
>  }
>  
>  DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 1034ef1..2fc9756 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -859,7 +859,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
>  	return 0;
>  }
>  
> -/* Must be called in user context */
> +/*
> + * Must be called in user context. The caller should hold the

I realise you're just copying that comment, but it seems wrong. "user
context" means userspace. I think it means "process context" doesn't it?

Also "should" should be "must" :)

> + * cpus_lock.
> + */
>  static int pseries_lpar_resize_hpt(unsigned long shift)
>  {
>  	struct hpt_resize_state state = {
> @@ -913,7 +916,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
>  
>  	t1 = ktime_get();
>  
> -	rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
> +	rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
> +				     &state, NULL);
>  
>  	t2 = ktime_get();

cheers

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/8xx: Add microcode patch to move SMC parameter RAM.
From: Michael Ellerman @ 2019-05-14  6:56 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Vitaly Bordug, Scott Wood
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <dd715639629639505ef4edd36d5a1aa4361e6edf.1557487355.git.christophe.leroy@c-s.fr>

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Some SCC functions like the QMC requires an extended parameter RAM.
> On modern 8xx (ie 866 and 885), SPI area can already be relocated,
> allowing the use of those functions on SCC2. But SCC3 and SCC4
> parameter RAM collide with SMC1 and SMC2 parameter RAMs.
>
> This patch adds microcode to allow the relocation of both SMC1 and
> SMC2, and relocate them at offsets 0x1ec0 and 0x1fc0.
> Those offsets are by default for the CPM1 DSP1 and DSP2, but there
> is no kernel driver using them at the moment so this area can be
> reused.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/platforms/8xx/Kconfig      |   7 ++
>  arch/powerpc/platforms/8xx/micropatch.c | 109 +++++++++++++++++++++++++++++++-
>  2 files changed, 114 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c
> index 33a9042fca80..dc4423daf7d4 100644
> --- a/arch/powerpc/platforms/8xx/micropatch.c
> +++ b/arch/powerpc/platforms/8xx/micropatch.c
> @@ -622,6 +622,86 @@ static uint patch_2f00[] __initdata = {
>  };
>  #endif
>  
> +/*
> + * SMC relocation patch arrays.
> + */
> +
> +#ifdef CONFIG_SMC_UCODE_PATCH
> +
> +static uint patch_2000[] __initdata = {
> +	0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
> +	0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000,
> +	0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2,

Do we have any doc on what these values are?

I get that it's microcode but do we have any more detail than that?
What's the source etc?

cheers

^ permalink raw reply

* Re: [PATCH v2] powerpc: Fix compile issue with force DAWR
From: Michael Neuling @ 2019-05-14  6:55 UTC (permalink / raw)
  To: Christophe Leroy, mpe; +Cc: linuxppc-dev
In-Reply-To: <2561c888-1ab5-1cd7-2fe2-509d8d71cae4@c-s.fr>


> > > > --
> > > > v2:
> > > >     Fixes based on Christophe Leroy's comments:
> > > >     - Fix commit message formatting
> > > >     - Move more DAWR code into dawr.c
> > > > ---
> > > >    arch/powerpc/Kconfig                     |  5 ++
> > > >    arch/powerpc/include/asm/hw_breakpoint.h | 20 ++++---
> > > >    arch/powerpc/kernel/Makefile             |  1 +
> > > >    arch/powerpc/kernel/dawr.c               | 75
> > > > ++++++++++++++++++++++++
> > > >    arch/powerpc/kernel/hw_breakpoint.c      | 56 ------------------
> > > >    arch/powerpc/kvm/Kconfig                 |  1 +
> > > >    6 files changed, 94 insertions(+), 64 deletions(-)
> > > >    create mode 100644 arch/powerpc/kernel/dawr.c
> > > > 
> > > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > > > index 2711aac246..f4b19e48cc 100644
> > > > --- a/arch/powerpc/Kconfig
> > > > +++ b/arch/powerpc/Kconfig
> > > > @@ -242,6 +242,7 @@ config PPC
> > > >    	select SYSCTL_EXCEPTION_TRACE
> > > >    	select THREAD_INFO_IN_TASK
> > > >    	select VIRT_TO_BUS			if !PPC64
> > > > +	select PPC_DAWR_FORCE_ENABLE		if PPC64 || PERF
> > > 
> > > What's PERF ? Did you mean PERF_EVENTS ?
> > > 
> > > Then what you mean is:
> > > - Selected all the time for PPC64
> > > - Selected for PPC32 when PERF is also selected.
> > > 
> > > Is that what you want ? At first I thought it was linked to P9.
> > 
> > This is wrong.  I think we just want PPC64. PERF is a red herring.
> 
> Are you sure ? Michael suggested PERF || KVM as far as I remember.

It was mpe but I think it was wrong.

We could make it more selective with something like:
   PPC64 && (HW_BREAK || PPC_ADV_DEBUG_REGS || PERF)
but I think that will end up back in the larger mess of
  https://github.com/linuxppc/issues/issues/128

I don't think the minor size gain is is worth delving in that mess, hence I made
it simply PPC64 which is hopefully an improvement on what we have since it
eliminates 32bit.

> > > >    #else	/* CONFIG_HAVE_HW_BREAKPOINT */
> > > >    static inline void hw_breakpoint_disable(void) { }
> > > >    static inline void thread_change_pc(struct task_struct *tsk,
> > > >    					struct pt_regs *regs) { }
> > > > -static inline bool dawr_enabled(void) { return false; }
> > > > +
> > > >    #endif	/* CONFIG_HAVE_HW_BREAKPOINT */
> > > > +
> > > > +extern bool dawr_force_enable;
> > > > +
> > > > +#ifdef CONFIG_PPC_DAWR_FORCE_ENABLE
> > > > +extern bool dawr_enabled(void);
> > > 
> > > Functions should not be 'extern'. I'm sure checkpatch --strict will tell
> > > you.
> > 
> > That's not what's currently being done in this header file.  I'm keeping
> > with
> > the style of that file.
> 
> style is not defined on a per file basis. There is the style from the 
> past and the nowadays style. If you keep old style just because the file 
> includes old style statements, then the code will never improve.
> 
> All new patches should come with clean 'checkpatch' report and follow 
> new style. Having mixed styles in a file is not a problem, that's the 
> way to improvement. See arch/powerpc/mm/mmu_decl.h as an exemple.

I'm not sure that's mpe's policy.

mpe?

> > > > +
> > > > +static ssize_t dawr_write_file_bool(struct file *file,
> > > > +				    const char __user *user_buf,
> > > > +				    size_t count, loff_t *ppos)
> > > > +{
> > > > +	struct arch_hw_breakpoint null_brk = {0, 0, 0};
> > > > +	size_t rc;
> > > > +
> > > > +	/* Send error to user if they hypervisor won't allow us to write
> > > > DAWR */
> > > > +	if ((!dawr_force_enable) &&
> > > > +	    (firmware_has_feature(FW_FEATURE_LPAR)) &&
> > > > +	    (set_dawr(&null_brk) != H_SUCCESS))
> > > 
> > > The above is not real clear.
> > > set_dabr() returns 0, H_SUCCESS is not used there.
> > 
> > It pseries_set_dawr() will return a hcall number.
> 
> Right, then it maybe means set_dawr() should be fixes ?

Sorry, I don't understand this.

> > This code hasn't changed. I'm just moving it.
> 
> Right, but could be an improvment for another patch.
> As far as I remember you are the one who wrote that code at first place, 
> arent't you ?

Yep, classic crap Mikey code :-)

Mikey

^ permalink raw reply

* Re: [PATCH] EDAC, mpc85xx: Prevent building as a module
From: Michael Ellerman @ 2019-05-14  6:50 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Johannes Thumshirn, linux-kernel, linuxppc-dev, james.morse,
	mchehab, linux-edac
In-Reply-To: <20190510182512.GG29927@zn.tnic>

Borislav Petkov <bp@alien8.de> writes:
> On Fri, May 10, 2019 at 04:13:20PM +0200, Borislav Petkov wrote:
>> On Fri, May 10, 2019 at 08:50:52PM +1000, Michael Ellerman wrote:
>> > Yeah that looks better to me. I didn't think about the case where EDAC
>> > core is modular.
>> > 
>> > Do you want me to send a new patch?
>> 
>> Nah, I'll fix it up.
>
> I've pushed it here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git/commit/?h=edac-fix-for-5.2
>
> in case you wanna throw your build tests on it. My dingy cross-compiler
> can't do much really.

Looks good. I even booted it :)

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mm: Handle page table allocation failures
From: Michael Ellerman @ 2019-05-14  6:40 UTC (permalink / raw)
  To: Aneesh Kumar K.V, npiggin, paulus
  Cc: Sachin Sant, Aneesh Kumar K.V, linuxppc-dev
In-Reply-To: <20190514010543.29896-1-aneesh.kumar@linux.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> This fix the below crash that arise due to not handling page table allocation
> failures while allocating hugetlb page table.
>
>  BUG: Kernel NULL pointer dereference at 0x0000001c
>  Faulting instruction address: 0xc000000001d1e58c
>  Oops: Kernel access of bad area, sig: 11 [#1]
>  LE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
>
>  CPU: 3 PID: 4635 Comm: futex_wake04 Tainted: G        W  O      5.1.0-next-20190507-autotest #1
>  NIP:  c000000001d1e58c LR: c000000001d1e54c CTR: 0000000000000000
>  REGS: c000000004937890 TRAP: 0300   Tainted: G        W  O       (5.1.0-next-20190507-autotest)
>  MSR:  8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22424822  XER: 00000000
>  CFAR: c00000000183e9e0 DAR: 000000000000001c DSISR: 40000000 IRQMASK: 0
>  GPR00: c000000001901a80 c000000004937b20 c000000003938700 0000000000000000
>  GPR04: 0000000000400cc0 000000000003efff 000000027966e000 c000000003ba8700
>  GPR08: c000000003ba8700 000000000d601125 c000000003ba8700 0000000080000000
>  GPR12: 0000000022424822 c00000001ecae280 0000000000000000 0000000000000000
>  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
>  GPR20: 0000000000000018 c0000000039e2d30 c0000000039e2d28 c0000002762da460
>  GPR24: 000000000000001c 0000000000000000 0000000000000001 c000000001901a80
>  GPR28: 0000000000400cc0 0000000000000000 0000000000000000 0000000000400cc0
>  NIP [c000000001d1e58c] kmem_cache_alloc+0xbc/0x5a0
>  LR [c000000001d1e54c] kmem_cache_alloc+0x7c/0x5a0
>  Call Trace:
>   [c000000001c91150] __pud_alloc+0x160/0x200 (unreliable)
>   [c000000001901a80] huge_pte_alloc+0x580/0x950
>   [c000000001cf7910] hugetlb_fault+0x9a0/0x1250
>   [c000000001c94a80] handle_mm_fault+0x490/0x4a0
>   [c0000000018d529c] __do_page_fault+0x77c/0x1f00
>   [c0000000018d6a48] do_page_fault+0x28/0x50
>   [c00000000183b0d4] handle_page_fault+0x18/0x38
>
> Fixes: e2b3d202d1db ("powerpc: Switch 16GB and 16MB explicit hugepages to a different page table format")
> Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>
> Note: I did add a recent commit for the Fixes tag. But in reality we never checked for page table
> allocation failure there. If we want to go to that old commit, then we may need.

If we never checked for failure in that path, is there some reason we've
only just noticed the crashes? Are we just testing under memory pressure
more effectively than we used to?

cheers

^ permalink raw reply

* [PATCH] powerpc/book3s/mm: Clear MMU_FTR_HPTE_TABLE when radix is enabled.
From: Aneesh Kumar K.V @ 2019-05-14  6:02 UTC (permalink / raw)
  To: npiggin, paulus, mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev

Avoids confusion when printing Oops message like below

 Faulting instruction address: 0xc00000000008bdb4
 Oops: Kernel access of bad area, sig: 11 [#1]
 LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV

Either ibm,pa-features or ibm,powerpc-cpu-features can be used to enable the
MMU features. We don't clear related MMU feature bits there. We use the kernel
commandline to determine what translation mode we want to use and clear the
HPTE or radix bit accordingly. On LPAR we do have to renable HASH bit if the
hypervisor can't do radix.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/init_64.c | 11 ++++++++++-
 arch/powerpc/mm/pgtable.c |  7 +++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 45b02fa11cd8..652d954444ba 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -355,15 +355,18 @@ static void __init early_check_vec5(void)
 	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
 	if (chosen == -FDT_ERR_NOTFOUND) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features |= MMU_FTR_HPTE_TABLE;
 		return;
 	}
 	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
 	if (!vec5) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features |= MMU_FTR_HPTE_TABLE;
 		return;
 	}
 	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features |= MMU_FTR_HPTE_TABLE;
 		return;
 	}
 
@@ -381,17 +384,23 @@ static void __init early_check_vec5(void)
 		}
 		/* Do radix anyway - the hypervisor said we had to */
 		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_HPTE_TABLE;
 	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
 		/* Hypervisor only supports hash - disable radix */
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features |= MMU_FTR_HPTE_TABLE;
 	}
 }
 
 void __init mmu_early_init_devtree(void)
 {
 	/* Disable radix mode based on kernel command line. */
-	if (disable_radix)
+	if (disable_radix) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+	} else {
+		if (early_radix_enabled())
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_HPTE_TABLE;
+	}
 
 	/*
 	 * Check /chosen/ibm,architecture-vec-5 if running as a guest.
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index b97aee03924f..0fa6cac3fe82 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -77,9 +77,6 @@ static struct page *maybe_pte_to_page(pte_t pte)
 
 static pte_t set_pte_filter_hash(pte_t pte)
 {
-	if (radix_enabled())
-		return pte;
-
 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
 				       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
@@ -110,6 +107,8 @@ static pte_t set_pte_filter(pte_t pte)
 
 	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
 		return set_pte_filter_hash(pte);
+	else if (radix_enabled())
+		return pte;
 
 	/* No exec permission in the first place, move on */
 	if (!pte_exec(pte) || !pte_looks_normal(pte))
@@ -140,7 +139,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
 {
 	struct page *pg;
 
-	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE) || radix_enabled())
 		return pte;
 
 	/* So here, we only care about exec faults, as we use them
-- 
2.21.0


^ permalink raw reply related

* [PATCH 3/3] powerpc/mm: Remove radix dependency on HugeTLB page
From: Aneesh Kumar K.V @ 2019-05-14  6:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev
In-Reply-To: <20190514060302.21505-1-aneesh.kumar@linux.ibm.com>

Now that we have switched the page table walk to use pmd_is_leaf we can now
revert commit 8adddf349fda ("powerpc/mm/radix: Make Radix require HUGETLB_PAGE")

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/Kconfig.cputype | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index d0e172d47574..fa6b03205ae1 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -330,7 +330,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 
 config PPC_RADIX_MMU
 	bool "Radix MMU Support"
-	depends on PPC_BOOK3S_64 && HUGETLB_PAGE
+	depends on PPC_BOOK3S_64
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
 	select PPC_HAVE_KUEP
 	select PPC_HAVE_KUAP
-- 
2.21.0


^ permalink raw reply related

* [PATCH 2/3] powerpc/mm: pmd_devmap implies pmd_large().
From: Aneesh Kumar K.V @ 2019-05-14  6:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev
In-Reply-To: <20190514060302.21505-1-aneesh.kumar@linux.ibm.com>

large devmap usage is dependent on THP. Hence once check is sufficient.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/book3s64/pgtable.c | 2 +-
 arch/powerpc/mm/pgtable_64.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 16bda049187a..471cea271dd3 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -76,7 +76,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 
 	WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
-	WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
+	WARN_ON(!(pmd_large(pmd)));
 #endif
 	trace_hugepage_set_pmd(addr, pmd_val(pmd));
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index bfb47b037a2f..c1541371f224 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -320,7 +320,7 @@ struct page *pud_page(pud_t pud)
 struct page *pmd_page(pmd_t pmd)
 {
 	if (pmd_is_leaf(pmd)) {
-		VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)));
+		VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
 		return pte_page(pmd_pte(pmd));
 	}
 	return virt_to_page(pmd_page_vaddr(pmd));
-- 
2.21.0


^ permalink raw reply related

* [PATCH 1/3] powerpc/book3s: Use config independent helpers for page table walk
From: Aneesh Kumar K.V @ 2019-05-14  6:03 UTC (permalink / raw)
  To: npiggin, paulus, mpe; +Cc: Aneesh Kumar K.V, linuxppc-dev

Even when we have HugeTLB and THP disabled, kernel linear map can still be
mapped with hugepages. This is only an issue with radix translation because hash
MMU doesn't map kernel linear range in linux page table and other kernel
map areas are not mapped using hugepage.

Add config independent helpers and put WARN_ON() when we don't expect things
to be mapped via hugepages.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 21 +++++++++++++++
 arch/powerpc/include/asm/pgtable.h           | 24 +++++++++++++++++
 arch/powerpc/include/asm/pte-walk.h          | 28 ++++++++++++++++++--
 arch/powerpc/kvm/book3s_64_mmu_radix.c       | 12 +++------
 arch/powerpc/mm/book3s64/radix_pgtable.c     | 10 +++----
 arch/powerpc/mm/pgtable.c                    | 16 +++++------
 arch/powerpc/mm/pgtable_64.c                 | 12 ++++++---
 arch/powerpc/mm/ptdump/ptdump.c              |  6 ++---
 arch/powerpc/xmon/xmon.c                     |  6 ++---
 9 files changed, 102 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 7dede2e34b70..f402b2a96ef3 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1313,5 +1313,26 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va
 	return false;
 }
 
+/*
+ * Like pmd_huge() and pmd_large(), but works regardless of config options
+ */
+#define pmd_is_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pud_is_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pgd_is_leaf pgd_is_leaf
+static inline bool pgd_is_leaf(pgd_t pgd)
+{
+	return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 3f53be60fb01..bf7d771f342e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -140,6 +140,30 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
 }
 #endif
 
+#ifndef pmd_is_leaf
+#define pmd_is_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+	return false;
+}
+#endif
+
+#ifndef pud_is_leaf
+#define pud_is_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+	return false;
+}
+#endif
+
+#ifndef pgd_is_leaf
+#define pgd_is_leaf pgd_is_leaf
+static inline bool pgd_is_leaf(pgd_t pgd)
+{
+	return false;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 2d633e9d686c..33fa5dd8ee6a 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -10,8 +10,20 @@ extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
 				    bool *is_thp, unsigned *hshift)
 {
+	pte_t *pte;
+
 	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
-	return __find_linux_pte(pgdir, ea, is_thp, hshift);
+	pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) &&						\
+	!(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+	/*
+	 * We should not find huge page if these configs are not enabled.
+	 */
+	if (hshift)
+		WARN_ON(*hshift);
+#endif
+	return pte;
 }
 
 static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
@@ -26,10 +38,22 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
 static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
 					 bool *is_thp, unsigned *hshift)
 {
+	pte_t *pte;
+
 	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
 	VM_WARN(pgdir != current->mm->pgd,
 		"%s lock less page table lookup called on wrong mm\n", __func__);
-	return __find_linux_pte(pgdir, ea, is_thp, hshift);
+	pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) &&						\
+	!(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+	/*
+	 * We should not find huge page if these configs are not enabled.
+	 */
+	if (hshift)
+		WARN_ON(*hshift);
+#endif
+	return pte;
 }
 
 #endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f55ef071883f..91efee7f0329 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -363,12 +363,6 @@ static void kvmppc_pte_free(pte_t *ptep)
 	kmem_cache_free(kvm_pte_cache, ptep);
 }
 
-/* Like pmd_huge() and pmd_large(), but works regardless of config options */
-static inline int pmd_is_leaf(pmd_t pmd)
-{
-	return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
 static pmd_t *kvmppc_pmd_alloc(void)
 {
 	return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
@@ -489,7 +483,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
 	for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
 		if (!pud_present(*p))
 			continue;
-		if (pud_huge(*p)) {
+		if (pud_is_leaf(*p)) {
 			pud_clear(p);
 		} else {
 			pmd_t *pmd;
@@ -588,7 +582,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
 		new_pud = pud_alloc_one(kvm->mm, gpa);
 
 	pmd = NULL;
-	if (pud && pud_present(*pud) && !pud_huge(*pud))
+	if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
 		pmd = pmd_offset(pud, gpa);
 	else if (level <= 1)
 		new_pmd = kvmppc_pmd_alloc();
@@ -611,7 +605,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
 		new_pud = NULL;
 	}
 	pud = pud_offset(pgd, gpa);
-	if (pud_huge(*pud)) {
+	if (pud_is_leaf(*pud)) {
 		unsigned long hgpa = gpa & PUD_MASK;
 
 		/* Check if we raced and someone else has set the same thing */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 247a38a3f3a6..b78f6fbdd307 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -202,14 +202,14 @@ void radix__change_memory_range(unsigned long start, unsigned long end,
 		pudp = pud_alloc(&init_mm, pgdp, idx);
 		if (!pudp)
 			continue;
-		if (pud_huge(*pudp)) {
+		if (pud_is_leaf(*pudp)) {
 			ptep = (pte_t *)pudp;
 			goto update_the_pte;
 		}
 		pmdp = pmd_alloc(&init_mm, pudp, idx);
 		if (!pmdp)
 			continue;
-		if (pmd_huge(*pmdp)) {
+		if (pmd_is_leaf(*pmdp)) {
 			ptep = pmdp_ptep(pmdp);
 			goto update_the_pte;
 		}
@@ -837,7 +837,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
 		if (!pmd_present(*pmd))
 			continue;
 
-		if (pmd_huge(*pmd)) {
+		if (pmd_is_leaf(*pmd)) {
 			split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
 			continue;
 		}
@@ -862,7 +862,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
 		if (!pud_present(*pud))
 			continue;
 
-		if (pud_huge(*pud)) {
+		if (pud_is_leaf(*pud)) {
 			split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
 			continue;
 		}
@@ -888,7 +888,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
 		if (!pgd_present(*pgd))
 			continue;
 
-		if (pgd_huge(*pgd)) {
+		if (pgd_is_leaf(*pgd)) {
 			split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
 			continue;
 			/* This should fall through? */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index db4a6253df92..b97aee03924f 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -340,10 +340,11 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 	if (pgd_none(pgd))
 		return NULL;
 
-	if (pgd_huge(pgd)) {
+	if (pgd_is_leaf(pgd)) {
 		ret_pte = (pte_t *)pgdp;
 		goto out;
 	}
+
 	if (is_hugepd(__hugepd(pgd_val(pgd)))) {
 		hpdp = (hugepd_t *)&pgd;
 		goto out_huge;
@@ -361,14 +362,16 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 	if (pud_none(pud))
 		return NULL;
 
-	if (pud_huge(pud)) {
+	if (pud_is_leaf(pud)) {
 		ret_pte = (pte_t *)pudp;
 		goto out;
 	}
+
 	if (is_hugepd(__hugepd(pud_val(pud)))) {
 		hpdp = (hugepd_t *)&pud;
 		goto out_huge;
 	}
+
 	pdshift = PMD_SHIFT;
 	pmdp = pmd_offset(&pud, ea);
 	pmd  = READ_ONCE(*pmdp);
@@ -385,15 +388,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 		ret_pte = (pte_t *)pmdp;
 		goto out;
 	}
-	/*
-	 * pmd_large check below will handle the swap pmd pte
-	 * we need to do both the check because they are config
-	 * dependent.
-	 */
-	if (pmd_huge(pmd) || pmd_large(pmd)) {
+
+	if (pmd_is_leaf(pmd)) {
 		ret_pte = (pte_t *)pmdp;
 		goto out;
 	}
+
 	if (is_hugepd(__hugepd(pmd_val(pmd)))) {
 		hpdp = (hugepd_t *)&pmd;
 		goto out_huge;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d2d976ff8a0e..bfb47b037a2f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -296,16 +296,20 @@ EXPORT_SYMBOL(__iounmap_at);
 /* 4 level page table */
 struct page *pgd_page(pgd_t pgd)
 {
-	if (pgd_huge(pgd))
+	if (pgd_is_leaf(pgd)) {
+		VM_WARN_ON(!pgd_huge(pgd));
 		return pte_page(pgd_pte(pgd));
+	}
 	return virt_to_page(pgd_page_vaddr(pgd));
 }
 #endif
 
 struct page *pud_page(pud_t pud)
 {
-	if (pud_huge(pud))
+	if (pud_is_leaf(pud)) {
+		VM_WARN_ON(!pud_huge(pud));
 		return pte_page(pud_pte(pud));
+	}
 	return virt_to_page(pud_page_vaddr(pud));
 }
 
@@ -315,8 +319,10 @@ struct page *pud_page(pud_t pud)
  */
 struct page *pmd_page(pmd_t pmd)
 {
-	if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+	if (pmd_is_leaf(pmd)) {
+		VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)));
 		return pte_page(pmd_pte(pmd));
+	}
 	return virt_to_page(pmd_page_vaddr(pmd));
 }
 
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 646876d9da64..abe60d25b4e6 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -277,7 +277,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd) && !pmd_huge(*pmd))
+		if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
 			/* pmd exists */
 			walk_pte(st, pmd, addr);
 		else
@@ -293,7 +293,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud) && !pud_huge(*pud))
+		if (!pud_none(*pud) && !pud_is_leaf(*pud))
 			/* pud exists */
 			walk_pmd(st, pud, addr);
 		else
@@ -314,7 +314,7 @@ static void walk_pagetables(struct pg_state *st)
 	 * the hash pagetable.
 	 */
 	for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
-		if (!pgd_none(*pgd) && !pgd_huge(*pgd))
+		if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd))
 			/* pgd exists */
 			walk_pud(st, pgd, addr);
 		else
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b0149b2bb6c..49353e01edfa 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3094,7 +3094,7 @@ static void show_pte(unsigned long addr)
 
 	printf("pgd  @ 0x%px\n", pgdir);
 
-	if (pgd_huge(*pgdp)) {
+	if (pgd_is_leaf(*pgdp)) {
 		format_pte(pgdp, pgd_val(*pgdp));
 		return;
 	}
@@ -3107,7 +3107,7 @@ static void show_pte(unsigned long addr)
 		return;
 	}
 
-	if (pud_huge(*pudp)) {
+	if (pud_is_leaf(*pudp)) {
 		format_pte(pudp, pud_val(*pudp));
 		return;
 	}
@@ -3121,7 +3121,7 @@ static void show_pte(unsigned long addr)
 		return;
 	}
 
-	if (pmd_huge(*pmdp)) {
+	if (pmd_is_leaf(*pmdp)) {
 		format_pte(pmdp, pmd_val(*pmdp));
 		return;
 	}
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox