LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 12/45] powerpc/ptdump: Properly handle non standard page size
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

In order to properly display information regardless of the page size,
it is necessary to take into account real page size.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Fixes: cabe8138b23c ("powerpc: dump as a single line areas mapping a single physical page.")
Cc: stable@vger.kernel.org
---
 arch/powerpc/mm/ptdump/ptdump.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 1f97668853e3..64434b66f240 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -60,6 +60,7 @@ struct pg_state {
 	unsigned long start_address;
 	unsigned long start_pa;
 	unsigned long last_pa;
+	unsigned long page_size;
 	unsigned int level;
 	u64 current_flags;
 	bool check_wx;
@@ -168,9 +169,9 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
 #endif
 
 	pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
-	if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) {
+	if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) {
 		pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
-		delta = PAGE_SIZE >> 10;
+		delta = st->page_size >> 10;
 	} else {
 		pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
 		delta = (addr - st->start_address) >> 10;
@@ -195,10 +196,11 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page(struct pg_state *st, unsigned long addr,
-	       unsigned int level, u64 val)
+	       unsigned int level, u64 val, int shift)
 {
 	u64 flag = val & pg_level[level].mask;
 	u64 pa = val & PTE_RPN_MASK;
+	unsigned long page_size = 1 << shift;
 
 	/* At first no level is set */
 	if (!st->level) {
@@ -207,6 +209,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		st->start_address = addr;
 		st->start_pa = pa;
 		st->last_pa = pa;
+		st->page_size = page_size;
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 	/*
 	 * Dump the section of virtual memory when:
@@ -218,7 +221,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 	 */
 	} else if (flag != st->current_flags || level != st->level ||
 		   addr >= st->marker[1].start_address ||
-		   (pa != st->last_pa + PAGE_SIZE &&
+		   (pa != st->last_pa + st->page_size &&
 		    (pa != st->start_pa || st->start_pa != st->last_pa))) {
 
 		/* Check the PTE flags */
@@ -246,6 +249,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		st->start_address = addr;
 		st->start_pa = pa;
 		st->last_pa = pa;
+		st->page_size = page_size;
 		st->current_flags = flag;
 		st->level = level;
 	} else {
@@ -261,7 +265,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte));
+		note_page(st, addr, 4, pte_val(*pte), PAGE_SHIFT);
 
 	}
 }
@@ -278,7 +282,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 			/* pmd exists */
 			walk_pte(st, pmd, addr);
 		else
-			note_page(st, addr, 3, pmd_val(*pmd));
+			note_page(st, addr, 3, pmd_val(*pmd), PTE_SHIFT);
 	}
 }
 
@@ -294,7 +298,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 			/* pud exists */
 			walk_pmd(st, pud, addr);
 		else
-			note_page(st, addr, 2, pud_val(*pud));
+			note_page(st, addr, 2, pud_val(*pud), PMD_SHIFT);
 	}
 }
 
@@ -313,7 +317,7 @@ static void walk_pagetables(struct pg_state *st)
 			/* pgd exists */
 			walk_pud(st, pgd, addr);
 		else
-			note_page(st, addr, 1, pgd_val(*pgd));
+			note_page(st, addr, 1, pgd_val(*pgd), PUD_SHIFT);
 	}
 }
 
@@ -368,7 +372,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 
 	/* Traverse kernel page tables */
 	walk_pagetables(&st);
-	note_page(&st, 0, 0, 0);
+	note_page(&st, 0, 0, 0, 0);
 	return 0;
 }
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 06/45] powerpc/kasan: Declare kasan_init_region() weak
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

In order to alloc sub-arches to alloc KASAN regions using optimised
methods (Huge pages on 8xx, BATs on BOOK3S, ...), declare
kasan_init_region() weak.

Also make kasan_init_shadow_page_tables() accessible from outside,
so that it can be called from the specific kasan_init_region()
functions if needed.

And populate remaining KASAN address space only once performed
the region mapping, to allow 8xx to allocate hugepd instead of
standard page tables for mapping via 8M hugepages.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/kasan.h      |  3 +++
 arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++++++++++----------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 4769bbf7173a..107a24c3f7b3 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -34,5 +34,8 @@ static inline void kasan_init(void) { }
 static inline void kasan_late_init(void) { }
 #endif
 
+int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end);
+int kasan_init_region(void *start, size_t size);
+
 #endif /* __ASSEMBLY */
 #endif
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 10481d904fea..76d418af4ce8 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -28,7 +28,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
 		__set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
 }
 
-static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
 {
 	pmd_t *pmd;
 	unsigned long k_cur, k_next;
@@ -52,7 +52,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned
 	return 0;
 }
 
-static int __init kasan_init_region(void *start, size_t size)
+int __init __weak kasan_init_region(void *start, size_t size)
 {
 	unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
 	unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
@@ -122,14 +122,6 @@ static void __init kasan_mmu_init(void)
 	int ret;
 	struct memblock_region *reg;
 
-	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
-	    IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
-		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
-		if (ret)
-			panic("kasan: kasan_init_shadow_page_tables() failed");
-	}
-
 	for_each_memblock(memory, reg) {
 		phys_addr_t base = reg->base;
 		phys_addr_t top = min(base + reg->size, total_lowmem);
@@ -141,6 +133,15 @@ static void __init kasan_mmu_init(void)
 		if (ret)
 			panic("kasan: kasan_init_region() failed");
 	}
+
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+	    IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+		if (ret)
+			panic("kasan: kasan_init_shadow_page_tables() failed");
+	}
+
 }
 
 void __init kasan_init(void)
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 10/45] powerpc/ptdump: Display size of BATs
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

Display the size of areas mapped with BATs.

For that, the size display for pages is refactorised.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Add missing include of linux/seq_file.h (Thanks to kbuild test robot)
---
 arch/powerpc/mm/ptdump/bats.c   |  4 ++++
 arch/powerpc/mm/ptdump/ptdump.c | 23 ++++++++++++++---------
 arch/powerpc/mm/ptdump/ptdump.h |  3 +++
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index d3a5d6b318d1..d6c660f63d71 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -10,6 +10,8 @@
 #include <asm/pgtable.h>
 #include <asm/cpu_has_feature.h>
 
+#include "ptdump.h"
+
 static char *pp_601(int k, int pp)
 {
 	if (pp == 0)
@@ -42,6 +44,7 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper)
 #else
 	seq_printf(m, "0x%08x ", pbn);
 #endif
+	pt_dump_size(m, size);
 
 	seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp));
 
@@ -88,6 +91,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool
 #else
 	seq_printf(m, "0x%08x ", brpn);
 #endif
+	pt_dump_size(m, size);
 
 	if (k == 1)
 		seq_puts(m, "User ");
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index d92bb8ea229c..1f97668853e3 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -112,6 +112,19 @@ static struct addr_marker address_markers[] = {
 		seq_putc(m, c);		\
 })
 
+void pt_dump_size(struct seq_file *m, unsigned long size)
+{
+	static const char units[] = "KMGTPE";
+	const char *unit = units;
+
+	/* Work out what appropriate unit to use */
+	while (!(size & 1023) && unit[1]) {
+		size >>= 10;
+		unit++;
+	}
+	pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
+}
+
 static void dump_flag_info(struct pg_state *st, const struct flag_info
 		*flag, u64 pte, int num)
 {
@@ -146,8 +159,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
 
 static void dump_addr(struct pg_state *st, unsigned long addr)
 {
-	static const char units[] = "KMGTPE";
-	const char *unit = units;
 	unsigned long delta;
 
 #ifdef CONFIG_PPC64
@@ -164,13 +175,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
 		pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
 		delta = (addr - st->start_address) >> 10;
 	}
-	/* Work out what appropriate unit to use */
-	while (!(delta & 1023) && unit[1]) {
-		delta >>= 10;
-		unit++;
-	}
-	pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
-
+	pt_dump_size(st->seq, delta);
 }
 
 static void note_prot_wx(struct pg_state *st, unsigned long addr)
diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h
index 5d513636de73..154efae96ae0 100644
--- a/arch/powerpc/mm/ptdump/ptdump.h
+++ b/arch/powerpc/mm/ptdump/ptdump.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/types.h>
+#include <linux/seq_file.h>
 
 struct flag_info {
 	u64		mask;
@@ -17,3 +18,5 @@ struct pgtable_level {
 };
 
 extern struct pgtable_level pg_level[5];
+
+void pt_dump_size(struct seq_file *m, unsigned long delta);
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 05/45] powerpc/kasan: Refactor update of early shadow mappings
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

kasan_remap_early_shadow_ro() and kasan_unmap_early_shadow_vmalloc()
are both updating the early shadow mapping: the first one sets
the mapping read-only while the other clears the mapping.

Refactor and create kasan_update_early_region()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 39 +++++++++++++--------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 91e2ade75192..10481d904fea 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -79,45 +79,42 @@ static int __init kasan_init_region(void *start, size_t size)
 	return 0;
 }
 
-static void __init kasan_remap_early_shadow_ro(void)
+static void __init
+kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
 {
-	pgprot_t prot = kasan_prot_ro();
-	unsigned long k_start = KASAN_SHADOW_START;
-	unsigned long k_end = KASAN_SHADOW_END;
 	unsigned long k_cur;
 	phys_addr_t pa = __pa(kasan_early_shadow_page);
 
-	kasan_populate_pte(kasan_early_shadow_pte, prot);
-
-	for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) {
+	for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
 		pmd_t *pmd = pmd_ptr_k(k_cur);
 		pte_t *ptep = pte_offset_kernel(pmd, k_cur);
 
 		if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
 			continue;
 
-		__set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+		__set_pte_at(&init_mm, k_cur, ptep, pte, 0);
 	}
-	flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	flush_tlb_kernel_range(k_start, k_end);
 }
 
-static void __init kasan_unmap_early_shadow_vmalloc(void)
+static void __init kasan_remap_early_shadow_ro(void)
 {
-	unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
-	unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
-	unsigned long k_cur;
+	pgprot_t prot = kasan_prot_ro();
 	phys_addr_t pa = __pa(kasan_early_shadow_page);
 
-	for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
-		pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
-		pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+	kasan_populate_pte(kasan_early_shadow_pte, prot);
 
-		if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
-			continue;
+	kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END,
+				  pfn_pte(PHYS_PFN(pa), prot));
+}
 
-		__set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0);
-	}
-	flush_tlb_kernel_range(k_start, k_end);
+static void __init kasan_unmap_early_shadow_vmalloc(void)
+{
+	unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
+	unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
+
+	kasan_update_early_region(k_start, k_end, __pte(0));
 }
 
 static void __init kasan_mmu_init(void)
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 09/45] powerpc/ptdump: Add _PAGE_COHERENT flag
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

For platforms using shared.c (4xx, Book3e, Book3s/32),
also handle the _PAGE_COHERENT flag with corresponds to the
M bit of the WIMG flags.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/shared.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index dab5d8028a9b..634b83aa3487 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -40,6 +40,11 @@ static const struct flag_info flag_array[] = {
 		.val	= _PAGE_NO_CACHE,
 		.set	= "i",
 		.clear	= " ",
+	}, {
+		.mask	= _PAGE_COHERENT,
+		.val	= _PAGE_COHERENT,
+		.set	= "m",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_GUARDED,
 		.val	= _PAGE_GUARDED,
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 08/45] powerpc/ptdump: Reorder flags
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

Reorder flags in a more logical way:
- Page size (huge) first
- User
- RWX
- Present
- WIMG
- Special
- Dirty and Accessed

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/8xx.c    | 30 +++++++++++++++---------------
 arch/powerpc/mm/ptdump/shared.c | 30 +++++++++++++++---------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index ca9ce94672f5..a3169677dced 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -11,11 +11,6 @@
 
 static const struct flag_info flag_array[] = {
 	{
-		.mask	= _PAGE_SH,
-		.val	= _PAGE_SH,
-		.set	= "sh",
-		.clear	= "  ",
-	}, {
 		.mask	= _PAGE_RO | _PAGE_NA,
 		.val	= 0,
 		.set	= "rw",
@@ -37,11 +32,26 @@ static const struct flag_info flag_array[] = {
 		.val	= _PAGE_PRESENT,
 		.set	= "p",
 		.clear	= " ",
+	}, {
+		.mask	= _PAGE_NO_CACHE,
+		.val	= _PAGE_NO_CACHE,
+		.set	= "i",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_GUARDED,
 		.val	= _PAGE_GUARDED,
 		.set	= "g",
 		.clear	= " ",
+	}, {
+		.mask	= _PAGE_SH,
+		.val	= _PAGE_SH,
+		.set	= "sh",
+		.clear	= "  ",
+	}, {
+		.mask	= _PAGE_SPECIAL,
+		.val	= _PAGE_SPECIAL,
+		.set	= "s",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_DIRTY,
 		.val	= _PAGE_DIRTY,
@@ -52,16 +62,6 @@ static const struct flag_info flag_array[] = {
 		.val	= _PAGE_ACCESSED,
 		.set	= "a",
 		.clear	= " ",
-	}, {
-		.mask	= _PAGE_NO_CACHE,
-		.val	= _PAGE_NO_CACHE,
-		.set	= "i",
-		.clear	= " ",
-	}, {
-		.mask	= _PAGE_SPECIAL,
-		.val	= _PAGE_SPECIAL,
-		.set	= "s",
-		.clear	= " ",
 	}
 };
 
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index 44a8a64a664f..dab5d8028a9b 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -30,21 +30,6 @@ static const struct flag_info flag_array[] = {
 		.val	= _PAGE_PRESENT,
 		.set	= "p",
 		.clear	= " ",
-	}, {
-		.mask	= _PAGE_GUARDED,
-		.val	= _PAGE_GUARDED,
-		.set	= "g",
-		.clear	= " ",
-	}, {
-		.mask	= _PAGE_DIRTY,
-		.val	= _PAGE_DIRTY,
-		.set	= "d",
-		.clear	= " ",
-	}, {
-		.mask	= _PAGE_ACCESSED,
-		.val	= _PAGE_ACCESSED,
-		.set	= "a",
-		.clear	= " ",
 	}, {
 		.mask	= _PAGE_WRITETHRU,
 		.val	= _PAGE_WRITETHRU,
@@ -55,11 +40,26 @@ static const struct flag_info flag_array[] = {
 		.val	= _PAGE_NO_CACHE,
 		.set	= "i",
 		.clear	= " ",
+	}, {
+		.mask	= _PAGE_GUARDED,
+		.val	= _PAGE_GUARDED,
+		.set	= "g",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_SPECIAL,
 		.val	= _PAGE_SPECIAL,
 		.set	= "s",
 		.clear	= " ",
+	}, {
+		.mask	= _PAGE_DIRTY,
+		.val	= _PAGE_DIRTY,
+		.set	= "d",
+		.clear	= " ",
+	}, {
+		.mask	= _PAGE_ACCESSED,
+		.val	= _PAGE_ACCESSED,
+		.set	= "a",
+		.clear	= " ",
 	}
 };
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 07/45] powerpc/ptdump: Limit size of flags text to 1/2 chars on PPC32
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

In order to have all flags fit on a 80 chars wide screen,
reduce the flags to 1 char (2 where ambiguous).

No cache is 'i'
User is 'ur' (Supervisor would be sr)
Shared (for 8xx) becomes 'sh' (it was 'user' when not shared but
that was ambiguous because that's not entirely right)

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/ptdump/8xx.c    | 33 ++++++++++++++++---------------
 arch/powerpc/mm/ptdump/shared.c | 35 +++++++++++++++++----------------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 9e2d8e847d6e..ca9ce94672f5 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -12,9 +12,9 @@
 static const struct flag_info flag_array[] = {
 	{
 		.mask	= _PAGE_SH,
-		.val	= 0,
-		.set	= "user",
-		.clear	= "    ",
+		.val	= _PAGE_SH,
+		.set	= "sh",
+		.clear	= "  ",
 	}, {
 		.mask	= _PAGE_RO | _PAGE_NA,
 		.val	= 0,
@@ -30,37 +30,38 @@ static const struct flag_info flag_array[] = {
 	}, {
 		.mask	= _PAGE_EXEC,
 		.val	= _PAGE_EXEC,
-		.set	= " X ",
-		.clear	= "   ",
+		.set	= "x",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_PRESENT,
 		.val	= _PAGE_PRESENT,
-		.set	= "present",
-		.clear	= "       ",
+		.set	= "p",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_GUARDED,
 		.val	= _PAGE_GUARDED,
-		.set	= "guarded",
-		.clear	= "       ",
+		.set	= "g",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_DIRTY,
 		.val	= _PAGE_DIRTY,
-		.set	= "dirty",
-		.clear	= "     ",
+		.set	= "d",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_ACCESSED,
 		.val	= _PAGE_ACCESSED,
-		.set	= "accessed",
-		.clear	= "        ",
+		.set	= "a",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_NO_CACHE,
 		.val	= _PAGE_NO_CACHE,
-		.set	= "no cache",
-		.clear	= "        ",
+		.set	= "i",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_SPECIAL,
 		.val	= _PAGE_SPECIAL,
-		.set	= "special",
+		.set	= "s",
+		.clear	= " ",
 	}
 };
 
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index f7ed2f187cb0..44a8a64a664f 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -13,8 +13,8 @@ static const struct flag_info flag_array[] = {
 	{
 		.mask	= _PAGE_USER,
 		.val	= _PAGE_USER,
-		.set	= "user",
-		.clear	= "    ",
+		.set	= "ur",
+		.clear	= "  ",
 	}, {
 		.mask	= _PAGE_RW,
 		.val	= _PAGE_RW,
@@ -23,42 +23,43 @@ static const struct flag_info flag_array[] = {
 	}, {
 		.mask	= _PAGE_EXEC,
 		.val	= _PAGE_EXEC,
-		.set	= " X ",
-		.clear	= "   ",
+		.set	= "x",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_PRESENT,
 		.val	= _PAGE_PRESENT,
-		.set	= "present",
-		.clear	= "       ",
+		.set	= "p",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_GUARDED,
 		.val	= _PAGE_GUARDED,
-		.set	= "guarded",
-		.clear	= "       ",
+		.set	= "g",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_DIRTY,
 		.val	= _PAGE_DIRTY,
-		.set	= "dirty",
-		.clear	= "     ",
+		.set	= "d",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_ACCESSED,
 		.val	= _PAGE_ACCESSED,
-		.set	= "accessed",
-		.clear	= "        ",
+		.set	= "a",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_WRITETHRU,
 		.val	= _PAGE_WRITETHRU,
-		.set	= "write through",
-		.clear	= "             ",
+		.set	= "w",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_NO_CACHE,
 		.val	= _PAGE_NO_CACHE,
-		.set	= "no cache",
-		.clear	= "        ",
+		.set	= "i",
+		.clear	= " ",
 	}, {
 		.mask	= _PAGE_SPECIAL,
 		.val	= _PAGE_SPECIAL,
-		.set	= "special",
+		.set	= "s",
+		.clear	= " ",
 	}
 };
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 03/45] powerpc/kasan: Fix shadow pages allocation failure
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

Doing kasan pages allocation in MMU_init is too early, kernel doesn't
have access yet to the entire memory space and memblock_alloc() fails
when the kernel is a bit big.

Do it from kasan_init() instead.

Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/kasan.h      | 2 --
 arch/powerpc/mm/init_32.c             | 2 --
 arch/powerpc/mm/kasan/kasan_init_32.c | 4 +++-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index fc900937f653..4769bbf7173a 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -27,12 +27,10 @@
 
 #ifdef CONFIG_KASAN
 void kasan_early_init(void);
-void kasan_mmu_init(void);
 void kasan_init(void);
 void kasan_late_init(void);
 #else
 static inline void kasan_init(void) { }
-static inline void kasan_mmu_init(void) { }
 static inline void kasan_late_init(void) { }
 #endif
 
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 872df48ae41b..a6991ef8727d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -170,8 +170,6 @@ void __init MMU_init(void)
 	btext_unmap();
 #endif
 
-	kasan_mmu_init();
-
 	setup_kup();
 
 	/* Shortly after that, the entire linear mapping will be available */
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 8b15fe09b967..b7c287adfd59 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -131,7 +131,7 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
 	flush_tlb_kernel_range(k_start, k_end);
 }
 
-void __init kasan_mmu_init(void)
+static void __init kasan_mmu_init(void)
 {
 	int ret;
 	struct memblock_region *reg;
@@ -159,6 +159,8 @@ void __init kasan_mmu_init(void)
 
 void __init kasan_init(void)
 {
+	kasan_mmu_init();
+
 	kasan_remap_early_shadow_ro();
 
 	clear_page(kasan_early_shadow_page);
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 04/45] powerpc/kasan: Remove unnecessary page table locking
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

Commit 45ff3c559585 ("powerpc/kasan: Fix parallel loading of
modules.") added spinlocks to manage parallele module loading.

Since then commit 47febbeeec44 ("powerpc/32: Force KASAN_VMALLOC for
modules") converted the module loading to KASAN_VMALLOC.

The spinlocking has then become unneeded and can be removed to
simplify kasan_init_shadow_page_tables()

Also remove inclusion of linux/moduleloader.h and linux/vmalloc.h
which are not needed anymore since the removal of modules management.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index b7c287adfd59..91e2ade75192 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -5,9 +5,7 @@
 #include <linux/kasan.h>
 #include <linux/printk.h>
 #include <linux/memblock.h>
-#include <linux/moduleloader.h>
 #include <linux/sched/task.h>
-#include <linux/vmalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/code-patching.h>
 #include <mm/mmu_decl.h>
@@ -34,31 +32,22 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned
 {
 	pmd_t *pmd;
 	unsigned long k_cur, k_next;
-	pte_t *new = NULL;
 
 	pmd = pmd_ptr_k(k_start);
 
 	for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+		pte_t *new;
+
 		k_next = pgd_addr_end(k_cur, k_end);
 		if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
 			continue;
 
-		if (!new)
-			new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+		new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
 
 		if (!new)
 			return -ENOMEM;
 		kasan_populate_pte(new, PAGE_KERNEL);
-
-		smp_wmb(); /* See comment in __pte_alloc */
-
-		spin_lock(&init_mm.page_table_lock);
-			/* Has another populated it ? */
-		if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) {
-			pmd_populate_kernel(&init_mm, pmd, new);
-			new = NULL;
-		}
-		spin_unlock(&init_mm.page_table_lock);
+		pmd_populate_kernel(&init_mm, pmd, new);
 	}
 	return 0;
 }
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 01/45] powerpc/kasan: Fix error detection on memory allocation
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

In case (k_start & PAGE_MASK) doesn't equal (kstart), 'va' will never be
NULL allthough 'block' is NULL

Check the return of memblock_alloc() directly instead of
the resulting address in the loop.

Fixes: 509cd3f2b473 ("powerpc/32: Simplify KASAN init")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/kasan/kasan_init_32.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index cbcad369fcb2..8b15fe09b967 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -76,15 +76,14 @@ static int __init kasan_init_region(void *start, size_t size)
 		return ret;
 
 	block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+	if (!block)
+		return -ENOMEM;
 
 	for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
 		pmd_t *pmd = pmd_ptr_k(k_cur);
 		void *va = block + k_cur - k_start;
 		pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
 
-		if (!va)
-			return -ENOMEM;
-
 		__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
 	}
 	flush_tlb_kernel_range(k_start, k_end);
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 00/45] Use hugepages to map kernel mem on 8xx
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

The main purpose of this big series is to:
- reorganise huge page handling to avoid using mm_slices.
- use huge pages to map kernel memory on the 8xx.

The 8xx supports 4 page sizes: 4k, 16k, 512k and 8M.
It uses 2 Level page tables, PGD having 1024 entries, each entry
covering 4M address space. Then each page table has 1024 entries.

At the time being, page sizes are managed in PGD entries, implying
the use of mm_slices as it can't mix several pages of the same size
in one page table.

The first purpose of this series is to reorganise things so that
standard page tables can also handle 512k pages. This is done by
adding a new _PAGE_HUGE flag which will be copied into the Level 1
entry in the TLB miss handler. That done, we have 2 types of pages:
- PGD entries to regular page tables handling 4k/16k and 512k pages
- PGD entries to hugepd tables handling 8M pages.

There is no need to mix 8M pages with other sizes, because a 8M page
will use more than what a single PGD covers.

Then comes the second purpose of this series. At the time being, the
8xx has implemented special handling in the TLB miss handlers in order
to transparently map kernel linear address space and the IMMR using
huge pages by building the TLB entries in assembly at the time of the
exception.

As mm_slices is only for user space pages, and also because it would
anyway not be convenient to slice kernel address space, it was not
possible to use huge pages for kernel address space. But after step
one of the series, it is now more flexible to use huge pages.

This series drop all assembly 'just in time' handling of huge pages
and use huge pages in page tables instead.

Once the above is done, then comes the cherry on cake:
- Use huge pages for KASAN shadow mapping
- Allow pinned TLBs with strict kernel rwx
- Allow pinned TLBs with debug pagealloc

Then, last but not least, those modifications for the 8xx allows the
following improvement on book3s/32:
- Mapping KASAN shadow with BATs
- Allowing BATs with debug pagealloc

All this allows to considerably simplify TLB miss handlers and associated
initialisation. The overhead of reading page tables is negligible
compared to the reduction of the miss handlers.

While we were at touching pte_update(), some cleanup was done
there too.

Tested widely on 8xx and 832x. Boot tested on QEMU MAC99.

Changes in v2:
- Selecting HUGETLBFS instead of HUGETLB_PAGE which leads to link failure.
- Rebase on latest powerpc/merge branch
- Reworked the way TLB 28 to 31 are pinned because it was not working.

Christophe Leroy (45):
  powerpc/kasan: Fix error detection on memory allocation
  powerpc/kasan: Fix issues by lowering KASAN_SHADOW_END
  powerpc/kasan: Fix shadow pages allocation failure
  powerpc/kasan: Remove unnecessary page table locking
  powerpc/kasan: Refactor update of early shadow mappings
  powerpc/kasan: Declare kasan_init_region() weak
  powerpc/ptdump: Limit size of flags text to 1/2 chars on PPC32
  powerpc/ptdump: Reorder flags
  powerpc/ptdump: Add _PAGE_COHERENT flag
  powerpc/ptdump: Display size of BATs
  powerpc/ptdump: Standardise display of BAT flags
  powerpc/ptdump: Properly handle non standard page size
  powerpc/ptdump: Handle hugepd at PGD level
  powerpc/32s: Don't warn when mapping RO data ROX.
  powerpc/mm: Allocate static page tables for fixmap
  powerpc/mm: Fix conditions to perform MMU specific management by
    blocks on PPC32.
  powerpc/mm: PTE_ATOMIC_UPDATES is only for 40x
  powerpc/mm: Refactor pte_update() on nohash/32
  powerpc/mm: Refactor pte_update() on book3s/32
  powerpc/mm: Standardise __ptep_test_and_clear_young() params between
    PPC32 and PPC64
  powerpc/mm: Standardise pte_update() prototype between PPC32 and PPC64
  powerpc/mm: Create a dedicated pte_update() for 8xx
  powerpc/mm: Reduce hugepd size for 8M hugepages on 8xx
  powerpc/8xx: Drop CONFIG_8xx_COPYBACK option
  powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages.
  powerpc/8xx: Manage 512k huge pages as standard pages.
  powerpc/8xx: Only 8M pages are hugepte pages now
  powerpc/8xx: MM_SLICE is not needed anymore
  powerpc/8xx: Move PPC_PIN_TLB options into 8xx Kconfig
  powerpc/8xx: Add function to set pinned TLBs
  powerpc/8xx: Don't set IMMR map anymore at boot
  powerpc/8xx: Always pin TLBs at startup.
  powerpc/8xx: Drop special handling of Linear and IMMR mappings in I/D
    TLB handlers
  powerpc/8xx: Remove now unused TLB miss functions
  powerpc/8xx: Move DTLB perf handling closer.
  powerpc/mm: Don't be too strict with _etext alignment on PPC32
  powerpc/8xx: Refactor kernel address boundary comparison
  powerpc/8xx: Add a function to early map kernel via huge pages
  powerpc/8xx: Map IMMR with a huge page
  powerpc/8xx: Map linear memory with huge pages
  powerpc/8xx: Allow STRICT_KERNEL_RwX with pinned TLB
  powerpc/8xx: Allow large TLBs with DEBUG_PAGEALLOC
  powerpc/8xx: Implement dedicated kasan_init_region()
  powerpc/32s: Allow mapping with BATs with DEBUG_PAGEALLOC
  powerpc/32s: Implement dedicated kasan_init_region()

 arch/powerpc/Kconfig                          |  62 +--
 arch/powerpc/configs/adder875_defconfig       |   1 -
 arch/powerpc/configs/ep88xc_defconfig         |   1 -
 arch/powerpc/configs/mpc866_ads_defconfig     |   1 -
 arch/powerpc/configs/mpc885_ads_defconfig     |   1 -
 arch/powerpc/configs/tqm8xx_defconfig         |   1 -
 arch/powerpc/include/asm/book3s/32/pgtable.h  |  78 ++--
 arch/powerpc/include/asm/fixmap.h             |   4 +
 arch/powerpc/include/asm/hugetlb.h            |   4 -
 arch/powerpc/include/asm/kasan.h              |  10 +-
 .../include/asm/nohash/32/hugetlb-8xx.h       |  32 +-
 arch/powerpc/include/asm/nohash/32/mmu-8xx.h  |  74 +---
 arch/powerpc/include/asm/nohash/32/pgtable.h  | 104 +++--
 arch/powerpc/include/asm/nohash/32/pte-8xx.h  |   4 +-
 arch/powerpc/include/asm/nohash/32/slice.h    |  20 -
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  28 +-
 arch/powerpc/include/asm/nohash/pgtable.h     |   2 +-
 arch/powerpc/include/asm/pgtable.h            |   2 +
 arch/powerpc/include/asm/slice.h              |   2 -
 arch/powerpc/kernel/head_8xx.S                | 354 ++++++++----------
 arch/powerpc/kernel/setup_32.c                |   2 +-
 arch/powerpc/kernel/vmlinux.lds.S             |   3 +-
 arch/powerpc/mm/book3s32/mmu.c                |  12 +-
 arch/powerpc/mm/hugetlbpage.c                 |  41 +-
 arch/powerpc/mm/init_32.c                     |  12 +-
 arch/powerpc/mm/kasan/8xx.c                   |  74 ++++
 arch/powerpc/mm/kasan/Makefile                |   2 +
 arch/powerpc/mm/kasan/book3s_32.c             |  57 +++
 arch/powerpc/mm/kasan/kasan_init_32.c         |  88 ++---
 arch/powerpc/mm/mmu_decl.h                    |   4 +
 arch/powerpc/mm/nohash/8xx.c                  | 226 ++++++-----
 arch/powerpc/mm/pgtable.c                     |  34 +-
 arch/powerpc/mm/pgtable_32.c                  |  22 +-
 arch/powerpc/mm/ptdump/8xx.c                  |  52 +--
 arch/powerpc/mm/ptdump/bats.c                 |  41 +-
 arch/powerpc/mm/ptdump/ptdump.c               |  72 +++-
 arch/powerpc/mm/ptdump/ptdump.h               |   3 +
 arch/powerpc/mm/ptdump/shared.c               |  58 +--
 arch/powerpc/perf/8xx-pmu.c                   |  10 -
 arch/powerpc/platforms/8xx/Kconfig            |  50 ++-
 arch/powerpc/platforms/Kconfig.cputype        |   2 +-
 arch/powerpc/sysdev/cpm_common.c              |   2 +
 42 files changed, 854 insertions(+), 798 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/nohash/32/slice.h
 create mode 100644 arch/powerpc/mm/kasan/8xx.c
 create mode 100644 arch/powerpc/mm/kasan/book3s_32.c

-- 
2.25.0


^ permalink raw reply

* [PATCH v2 02/45] powerpc/kasan: Fix issues by lowering KASAN_SHADOW_END
From: Christophe Leroy @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1588783498.git.christophe.leroy@csgroup.eu>

At the time being, KASAN_SHADOW_END is 0x100000000, which
is 0 in 32 bits representation.

This leads to a couple of issues:
- kasan_remap_early_shadow_ro() does nothing because the comparison
k_cur < k_end is always false.
- In ptdump, address comparison for markers display fails and the
marker's name is printed at the start of the KASAN area instead of
being printed at the end.

However, there is no need to shadow the KASAN shadow area itself,
so the KASAN shadow area can stop shadowing memory at the start
of itself.

With a PAGE_OFFSET set to 0xc0000000, KASAN shadow area is then going
from 0xf8000000 to 0xff000000.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Fixes: cbd18991e24f ("powerpc/mm: Fix an Oops in kasan_mmu_init()")
Cc: stable@vger.kernel.org
---
 arch/powerpc/include/asm/kasan.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index fbff9ff9032e..fc900937f653 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -23,9 +23,7 @@
 
 #define KASAN_SHADOW_OFFSET	ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
 
-#define KASAN_SHADOW_END	0UL
-
-#define KASAN_SHADOW_SIZE	(KASAN_SHADOW_END - KASAN_SHADOW_START)
+#define KASAN_SHADOW_END	(-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT))
 
 #ifdef CONFIG_KASAN
 void kasan_early_init(void);
-- 
2.25.0


^ permalink raw reply related

* Re: remove set_fs calls from the coredump code v6
From: Eric W. Biederman @ 2020-05-06 15:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, linuxppc-dev, the arch/x86 maintainers,
	Oleg Nesterov, Linux Kernel Mailing List, Alexander Viro,
	linux-fsdevel, Andrew Morton, Linus Torvalds, Jeremy Kerr
In-Reply-To: <20200506063134.GA11391@lst.de>

Christoph Hellwig <hch@lst.de> writes:

> On Tue, May 05, 2020 at 03:28:50PM -0500, Eric W. Biederman wrote:
>> We probably can.   After introducing a kernel_compat_siginfo that is
>> the size that userspace actually would need.
>> 
>> It isn't something I want to mess with until this code gets merged, as I
>> think the set_fs cleanups are more important.
>> 
>> 
>> Christoph made some good points about how ugly the #ifdefs are in
>> the generic copy_siginfo_to_user32 implementation.
>
> Take a look at the series you are replying to, the magic x86 ifdefs are
> entirely gone from the common code :)

Interesting.

That is a different way of achieving that, and I don't hate it.
  
I still want whatever you are doing to settle before I touch that code
again.  Removing the set_fs is important and I have other fish to fry
at the moment.

Eric




^ permalink raw reply

* [PATCH 21/91] perf vendor events power9: Add hv_24x7 socket/chip level metric events
From: Arnaldo Carvalho de Melo @ 2020-05-06 15:21 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner
  Cc: Mark Rutland, Madhavan Srinivasan, Peter Zijlstra, Jin Yao,
	Jiri Olsa, Kan Liang, Andi Kleen, Clark Williams, Anju T Sudhakar,
	Mamatha Inamdar, Sukadev Bhattiprolu, Ravi Bangoria, Kajol Jain,
	Arnaldo Carvalho de Melo, Joe Mario, Namhyung Kim, Michael Petlan,
	Greg Kroah-Hartman, linux-kernel, Alexander Shishkin,
	linux-perf-users, Jiri Olsa, linuxppc-dev
In-Reply-To: <20200506152234.21977-1-acme@kernel.org>

From: Kajol Jain <kjain@linux.ibm.com>

The hv_24×7 feature in IBM® POWER9™ processor-based servers provide the
facility to continuously collect large numbers of hardware performance
metrics efficiently and accurately.

This patch adds hv_24x7  metric file for different Socket/chip
resources.

Result:

power9 platform:

  command:# ./perf stat --metric-only -M Memory_RD_BW_Chip -C 0 -I 1000

     1.000096188          0.9           0.3
     2.000285720          0.5           0.1
     3.000424990          0.4           0.1

  command:# ./perf stat --metric-only -M PowerBUS_Frequency -C 0 -I 1000

     1.000097981          2.3           2.3
     2.000291713          2.3           2.3
     3.000421719          2.3           2.3
     4.000550912          2.3           2.3

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20200401203340.31402-8-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/powerpc/power9/nest_metrics.json     | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json

diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
new file mode 100644
index 000000000000..c121e526442a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -0,0 +1,19 @@
+[
+    {
+        "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
+        "MetricName": "Memory_RD_BW_Chip",
+        "MetricGroup": "Memory_BW",
+        "ScaleUnit": "1.6e-2MB"
+    },
+    {
+	"MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )",
+        "MetricName": "Memory_WR_BW_Chip",
+        "MetricGroup": "Memory_BW",
+        "ScaleUnit": "1.6e-2MB"
+    },
+    {
+	"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
+        "MetricName": "PowerBUS_Frequency",
+        "ScaleUnit": "2.5e-7GHz"
+    }
+]
-- 
2.21.1


^ permalink raw reply related

* [PATCH 20/91] perf tools: Enable Hz/hz prinitg for --metric-only option
From: Arnaldo Carvalho de Melo @ 2020-05-06 15:21 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner
  Cc: Mark Rutland, Madhavan Srinivasan, Peter Zijlstra, Jin Yao,
	Jiri Olsa, Kan Liang, Andi Kleen, Clark Williams, Anju T Sudhakar,
	Mamatha Inamdar, Sukadev Bhattiprolu, Ravi Bangoria, Kajol Jain,
	Arnaldo Carvalho de Melo, Joe Mario, Namhyung Kim, Michael Petlan,
	Greg Kroah-Hartman, linux-kernel, Alexander Shishkin,
	linux-perf-users, Jiri Olsa, linuxppc-dev
In-Reply-To: <20200506152234.21977-1-acme@kernel.org>

From: Kajol Jain <kjain@linux.ibm.com>

Commit 54b5091606c18 ("perf stat: Implement --metric-only mode") added
function 'valid_only_metric()' which drops "Hz" or "hz", if it is part
of "ScaleUnit". This patch enable it since hv_24x7 supports couple of
frequency events.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20200401203340.31402-7-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 9e757d18d713..679aaa655824 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit)
 	if (!unit)
 		return false;
 	if (strstr(unit, "/sec") ||
-	    strstr(unit, "hz") ||
-	    strstr(unit, "Hz") ||
 	    strstr(unit, "CPUs utilized"))
 		return false;
 	return true;
-- 
2.21.1


^ permalink raw reply related

* [PATCH 19/91] perf tests expr: Added test for runtime param in metric expression
From: Arnaldo Carvalho de Melo @ 2020-05-06 15:21 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner
  Cc: Mark Rutland, Madhavan Srinivasan, Peter Zijlstra, Jin Yao,
	Jiri Olsa, Kan Liang, Andi Kleen, Clark Williams, Anju T Sudhakar,
	Mamatha Inamdar, Sukadev Bhattiprolu, Ravi Bangoria, Kajol Jain,
	Arnaldo Carvalho de Melo, Joe Mario, Namhyung Kim, Michael Petlan,
	Greg Kroah-Hartman, linux-kernel, Alexander Shishkin,
	linux-perf-users, Jiri Olsa, linuxppc-dev
In-Reply-To: <20200506152234.21977-1-acme@kernel.org>

From: Kajol Jain <kjain@linux.ibm.com>

Added test case for parsing  "?" in metric expression.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20200401203340.31402-6-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/expr.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 516504cf0ea5..f9e8e5628836 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -59,6 +59,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
 	TEST_ASSERT_VAL("find other", other[3] == NULL);
 
+	TEST_ASSERT_VAL("find other",
+			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL,
+				   &other, &num_other, 3) == 0);
+	TEST_ASSERT_VAL("find other", num_other == 2);
+	TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/"));
+	TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/"));
+	TEST_ASSERT_VAL("find other", other[2] == NULL);
+
 	for (i = 0; i < num_other; i++)
 		zfree(&other[i]);
 	free((void *)other);
-- 
2.21.1


^ permalink raw reply related

* [PATCH 18/91] perf metricgroups: Enhance JSON/metric infrastructure to handle "?"
From: Arnaldo Carvalho de Melo @ 2020-05-06 15:21 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner
  Cc: Mark Rutland, Madhavan Srinivasan, Peter Zijlstra, Jin Yao,
	Jiri Olsa, Kan Liang, Andi Kleen, Clark Williams, Anju T Sudhakar,
	Mamatha Inamdar, Sukadev Bhattiprolu, Ravi Bangoria, Kajol Jain,
	Arnaldo Carvalho de Melo, Joe Mario, Namhyung Kim, Michael Petlan,
	Greg Kroah-Hartman, linux-kernel, Alexander Shishkin,
	linux-perf-users, Jiri Olsa, linuxppc-dev
In-Reply-To: <20200506152234.21977-1-acme@kernel.org>

From: Kajol Jain <kjain@linux.ibm.com>

Patch enhances current metric infrastructure to handle "?" in the metric
expression. The "?" can be use for parameters whose value not known
while creating metric events and which can be replace later at runtime
to the proper value. It also add flexibility to create multiple events
out of single metric event added in JSON file.

Patch adds function 'arch_get_runtimeparam' which is a arch specific
function, returns the count of metric events need to be created.  By
default it return 1.

This infrastructure needed for hv_24x7 socket/chip level events.
"hv_24x7" chip level events needs specific chip-id to which the data is
requested. Function 'arch_get_runtimeparam' implemented in header.c
which extract number of sockets from sysfs file "sockets" under
"/sys/devices/hv_24x7/interface/".

With this patch basically we are trying to create as many metric events
as define by runtime_param.

For that one loop is added in function 'metricgroup__add_metric', which
create multiple events at run time depend on return value of
'arch_get_runtimeparam' and merge that event in 'group_list'.

To achieve that we are actually passing this parameter value as part of
`expr__find_other` function and changing "?" present in metric
expression with this value.

As in our JSON file, there gonna be single metric event, and out of
which we are creating multiple events.

To understand which data count belongs to which parameter value,
we also printing param value in generic_metric function.

For example,

  command:# ./perf stat  -M PowerBUS_Frequency -C 0 -I 1000
    1.000101867  9,356,933  hv_24x7/pm_pb_cyc,chip=0/ #  2.3 GHz  PowerBUS_Frequency_0
    1.000101867  9,366,134  hv_24x7/pm_pb_cyc,chip=1/ #  2.3 GHz  PowerBUS_Frequency_1
    2.000314878  9,365,868  hv_24x7/pm_pb_cyc,chip=0/ #  2.3 GHz  PowerBUS_Frequency_0
    2.000314878  9,366,092  hv_24x7/pm_pb_cyc,chip=1/ #  2.3 GHz  PowerBUS_Frequency_1

So, here _0 and _1 after PowerBUS_Frequency specify parameter value.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20200401203340.31402-5-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/header.c |  8 ++++++++
 tools/perf/tests/expr.c               |  8 ++++----
 tools/perf/util/expr.c                | 11 ++++++-----
 tools/perf/util/expr.h                |  5 +++--
 tools/perf/util/expr.l                | 27 +++++++++++++++++++-------
 tools/perf/util/metricgroup.c         | 28 ++++++++++++++++++++++++---
 tools/perf/util/metricgroup.h         |  2 ++
 tools/perf/util/stat-shadow.c         | 17 ++++++++++------
 8 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 3b4cdfc5efd6..d4870074f14c 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,6 +7,8 @@
 #include <string.h>
 #include <linux/stringify.h>
 #include "header.h"
+#include "metricgroup.h"
+#include <api/fs/fs.h>
 
 #define mfspr(rn)       ({unsigned long rval; \
 			 asm volatile("mfspr %0," __stringify(rn) \
@@ -44,3 +46,9 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
 
 	return bufp;
 }
+
+int arch_get_runtimeparam(void)
+{
+	int count;
+	return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count;
+}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index ea10fc4412c4..516504cf0ea5 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -10,7 +10,7 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
 {
 	double val;
 
-	if (expr__parse(&val, ctx, e))
+	if (expr__parse(&val, ctx, e, 1))
 		TEST_ASSERT_VAL("parse test failed", 0);
 	TEST_ASSERT_VAL("unexpected value", val == val2);
 	return 0;
@@ -44,15 +44,15 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 		return ret;
 
 	p = "FOO/0";
-	ret = expr__parse(&val, &ctx, p);
+	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("division by zero", ret == -1);
 
 	p = "BAR/";
-	ret = expr__parse(&val, &ctx, p);
+	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("missing operand", ret == -1);
 
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0);
 	TEST_ASSERT_VAL("find other", num_other == 3);
 	TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
 	TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c3382d58cf40..aa631e37ad1e 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -27,10 +27,11 @@ void expr__ctx_init(struct expr_parse_ctx *ctx)
 
 static int
 __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
-	      int start)
+	      int start, int runtime)
 {
 	struct expr_scanner_ctx scanner_ctx = {
 		.start_token = start,
+		.runtime = runtime,
 	};
 	YY_BUFFER_STATE buffer;
 	void *scanner;
@@ -54,9 +55,9 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
 	return ret;
 }
 
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime)
 {
-	return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+	return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
 }
 
 static bool
@@ -74,13 +75,13 @@ already_seen(const char *val, const char *one, const char **other,
 }
 
 int expr__find_other(const char *expr, const char *one, const char ***other,
-		     int *num_other)
+		     int *num_other, int runtime)
 {
 	int err, i = 0, j = 0;
 	struct expr_parse_ctx ctx;
 
 	expr__ctx_init(&ctx);
-	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime);
 	if (err)
 		return -1;
 
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 0938ad166ece..87d627bb699b 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -17,12 +17,13 @@ struct expr_parse_ctx {
 
 struct expr_scanner_ctx {
 	int start_token;
+	int runtime;
 };
 
 void expr__ctx_init(struct expr_parse_ctx *ctx);
 void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime);
 int expr__find_other(const char *expr, const char *one, const char ***other,
-		int *num_other);
+		int *num_other, int runtime);
 
 #endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 2582c2464938..74b9b59b1aa5 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -35,7 +35,7 @@ static int value(yyscan_t scanner, int base)
  * Allow @ instead of / to be able to specify pmu/event/ without
  * conflicts with normal division.
  */
-static char *normalize(char *str)
+static char *normalize(char *str, int runtime)
 {
 	char *ret = str;
 	char *dst = str;
@@ -45,6 +45,19 @@ static char *normalize(char *str)
 			*dst++ = '/';
 		else if (*str == '\\')
 			*dst++ = *++str;
+		 else if (*str == '?') {
+			char *paramval;
+			int i = 0;
+			int size = asprintf(&paramval, "%d", runtime);
+
+			if (size < 0)
+				*dst++ = '0';
+			else {
+				while (i < size)
+					*dst++ = paramval[i++];
+				free(paramval);
+			}
+		}
 		else
 			*dst++ = *str;
 		str++;
@@ -54,16 +67,16 @@ static char *normalize(char *str)
 	return ret;
 }
 
-static int str(yyscan_t scanner, int token)
+static int str(yyscan_t scanner, int token, int runtime)
 {
 	YYSTYPE *yylval = expr_get_lval(scanner);
 	char *text = expr_get_text(scanner);
 
-	yylval->str = normalize(strdup(text));
+	yylval->str = normalize(strdup(text), runtime);
 	if (!yylval->str)
 		return EXPR_ERROR;
 
-	yylval->str = normalize(yylval->str);
+	yylval->str = normalize(yylval->str, runtime);
 	return token;
 }
 %}
@@ -72,8 +85,8 @@ number		[0-9]+
 
 sch		[-,=]
 spec		\\{sch}
-sym		[0-9a-zA-Z_\.:@]+
-symbol		{spec}*{sym}*{spec}*{sym}*
+sym		[0-9a-zA-Z_\.:@?]+
+symbol		{spec}*{sym}*{spec}*{sym}*{spec}*{sym}
 
 %%
 	struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
@@ -93,7 +106,7 @@ if		{ return IF; }
 else		{ return ELSE; }
 #smt_on		{ return SMT_ON; }
 {number}	{ return value(yyscanner, 10); }
-{symbol}	{ return str(yyscanner, ID); }
+{symbol}	{ return str(yyscanner, ID, sctx->runtime); }
 "|"		{ return '|'; }
 "^"		{ return '^'; }
 "&"		{ return '&'; }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 7ad81c8177ea..b071df373f8b 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -90,6 +90,7 @@ struct egroup {
 	const char *metric_name;
 	const char *metric_expr;
 	const char *metric_unit;
+	int runtime;
 };
 
 static struct evsel *find_evsel_group(struct evlist *perf_evlist,
@@ -202,6 +203,7 @@ static int metricgroup__setup_events(struct list_head *groups,
 		expr->metric_name = eg->metric_name;
 		expr->metric_unit = eg->metric_unit;
 		expr->metric_events = metric_events;
+		expr->runtime = eg->runtime;
 		list_add(&expr->nd, &me->head);
 	}
 
@@ -485,15 +487,20 @@ static bool metricgroup__has_constraint(struct pmu_event *pe)
 	return false;
 }
 
+int __weak arch_get_runtimeparam(void)
+{
+	return 1;
+}
+
 static int __metricgroup__add_metric(struct strbuf *events,
-			struct list_head *group_list, struct pmu_event *pe)
+		struct list_head *group_list, struct pmu_event *pe, int runtime)
 {
 
 	const char **ids;
 	int idnum;
 	struct egroup *eg;
 
-	if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum) < 0)
+	if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0)
 		return -EINVAL;
 
 	if (events->len > 0)
@@ -513,6 +520,7 @@ static int __metricgroup__add_metric(struct strbuf *events,
 	eg->metric_name = pe->metric_name;
 	eg->metric_expr = pe->metric_expr;
 	eg->metric_unit = pe->unit;
+	eg->runtime = runtime;
 	list_add_tail(&eg->nd, group_list);
 
 	return 0;
@@ -540,7 +548,21 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 
 			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
 
-			ret = __metricgroup__add_metric(events,	group_list, pe);
+			if (!strstr(pe->metric_expr, "?")) {
+				ret = __metricgroup__add_metric(events, group_list, pe, 1);
+			} else {
+				int j, count;
+
+				count = arch_get_runtimeparam();
+
+				/* This loop is added to create multiple
+				 * events depend on count value and add
+				 * those events to group_list.
+				 */
+
+				for (j = 0; j < count; j++)
+					ret = __metricgroup__add_metric(events, group_list, pe, j);
+			}
 			if (ret == -ENOMEM)
 				break;
 		}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 475c7f912864..6b09eb30b4ec 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -22,6 +22,7 @@ struct metric_expr {
 	const char *metric_name;
 	const char *metric_unit;
 	struct evsel **metric_events;
+	int runtime;
 };
 
 struct metric_event *metricgroup__lookup(struct rblist *metric_events,
@@ -34,4 +35,5 @@ int metricgroup__parse_groups(const struct option *opt,
 void metricgroup__print(bool metrics, bool groups, char *filter,
 			bool raw, bool details);
 bool metricgroup__has_metric(const char *metric);
+int arch_get_runtimeparam(void);
 #endif
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 1ad5c5be7e97..518fbb3a6269 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -336,7 +336,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 		metric_events = counter->metric_events;
 		if (!metric_events) {
 			if (expr__find_other(counter->metric_expr, counter->name,
-						&metric_names, &num_metric_names) < 0)
+						&metric_names, &num_metric_names, 1) < 0)
 				continue;
 
 			metric_events = calloc(sizeof(struct evsel *),
@@ -723,6 +723,7 @@ static void generic_metric(struct perf_stat_config *config,
 			   char *name,
 			   const char *metric_name,
 			   const char *metric_unit,
+			   int runtime,
 			   double avg,
 			   int cpu,
 			   struct perf_stat_output_ctx *out,
@@ -777,7 +778,7 @@ static void generic_metric(struct perf_stat_config *config,
 	}
 
 	if (!metric_events[i]) {
-		if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
+		if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
 			char *unit;
 			char metric_bf[64];
 
@@ -786,9 +787,13 @@ static void generic_metric(struct perf_stat_config *config,
 					&unit, &scale) >= 0) {
 					ratio *= scale;
 				}
-
-				scnprintf(metric_bf, sizeof(metric_bf),
+				if (strstr(metric_expr, "?"))
+					scnprintf(metric_bf, sizeof(metric_bf),
+					  "%s  %s_%d", unit, metric_name, runtime);
+				else
+					scnprintf(metric_bf, sizeof(metric_bf),
 					  "%s  %s", unit, metric_name);
+
 				print_metric(config, ctxp, NULL, "%8.1f",
 					     metric_bf, ratio);
 			} else {
@@ -1022,7 +1027,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
 		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
-				evsel->metric_name, NULL, avg, cpu, out, st);
+				evsel->metric_name, NULL, 1, avg, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
@@ -1051,7 +1056,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				out->new_line(config, ctxp);
 			generic_metric(config, mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
-					mexp->metric_unit, avg, cpu, out, st);
+					mexp->metric_unit, mexp->runtime, avg, cpu, out, st);
 		}
 	}
 	if (num == 0)
-- 
2.21.1


^ permalink raw reply related

* Re: [PATCH v4 1/2] powerpc/eeh: fix pseries_eeh_configure_bridge()
From: Nathan Lynch @ 2020-05-06 14:55 UTC (permalink / raw)
  To: Sam Bobroff; +Cc: linuxppc-dev, Oliver O'Halloran
In-Reply-To: <1b0a6010a647dc915816e44845b64d72066676a7.1588045502.git.sbobroff@linux.ibm.com>

Sam Bobroff <sbobroff@linux.ibm.com> writes:
> If a device is hot unplgged during EEH recovery, it's possible for the
> RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
> parameter error (-3), however negative return values are not checked
> for and this leads to an infinite loop.
>
> Fix this by correctly bailing out on negative values.

Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>

Thanks.

^ permalink raw reply

* [PATCH v8 0/5] powerpc/hv-24x7: Expose chip/sockets info to add json file metric support for the hv_24x7 socket/chip level events
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	mingo, kan.liang

Patchset fixes the inconsistent results we are getting when
we run multiple 24x7 events.

"hv_24x7" pmu interface events needs system dependent parameter
like socket/chip/core. For example, hv_24x7 chip level events needs
specific chip-id to which the data is requested should be added as part
of pmu events.

So to enable JSON file support to "hv_24x7" interface, patchset expose
total number of sockets and chips per-socket details in sysfs
files (sockets, chips) under "/sys/devices/hv_24x7/interface/".

To get number of sockets, chips per sockets and cores per chip patchset adds a
rtas call with token "PROCESSOR_MODULE_INFO" to get these details. Patchset
also handles partition migration case to re-init these system depended
parameters by adding proper calls in post_mobility_fixup() (mobility.c).

v7: http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=167076

Changelog:
v7 -> v8
- Add support for exposing cores per details as well.
  Suggested by: Madhavan Srinivasan.
- Remove config check for 'CONFIG_PPC_RTAS' in previous
  implementation and address other comments by Michael Ellerman.

v6 -> v7
- Split patchset into two patch series, one with kernel changes
  and another with perf tool side changes. This pachset contain
  all kernel side changes.

Kajol Jain (5):
  powerpc/perf/hv-24x7: Fix inconsistent output values incase multiple
    hv-24x7 events run
  powerpc/hv-24x7: Add rtas call in hv-24x7 driver to get processor
    details
  powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show
    processor details
  Documentation/ABI: Add ABI documentation for chips and sockets
  powerpc/hv-24x7: Update post_mobility_fixup() to handle migration

 .../sysfs-bus-event_source-devices-hv_24x7    |  21 ++++
 arch/powerpc/include/asm/rtas.h               |   1 +
 arch/powerpc/perf/hv-24x7.c                   | 106 ++++++++++++++++--
 arch/powerpc/platforms/pseries/mobility.c     |  16 +++
 4 files changed, 134 insertions(+), 10 deletions(-)

-- 
2.18.2


^ permalink raw reply

* [PATCH v8 5/5] powerpc/hv-24x7: Update post_mobility_fixup() to handle migration
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	mingo, kan.liang
In-Reply-To: <20200506110737.14904-1-kjain@linux.ibm.com>

Function 'read_sys_info_pseries()' is added to get system parameter
values like number of sockets and chips per socket.
and it gets these details via rtas_call with token
"PROCESSOR_MODULE_INFO".

Incase lpar migrate from one system to another, system
parameter details like chips per sockets or number of sockets might
change. So, it needs to be re-initialized otherwise, these values
corresponds to previous system values.
This patch adds a call to 'read_sys_info_pseries()' from
'post-mobility_fixup()' to re-init the physsockets and physchips values

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/mobility.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index b571285f6c14..0fb8f1e6e9d2 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -42,6 +42,12 @@ struct update_props_workarea {
 #define MIGRATION_SCOPE	(1)
 #define PRRN_SCOPE -2
 
+#ifdef CONFIG_HV_PERF_CTRS
+void read_sys_info_pseries(void);
+#else
+static inline void read_sys_info_pseries(void) { }
+#endif
+
 static int mobility_rtas_call(int token, char *buf, s32 scope)
 {
 	int rc;
@@ -371,6 +377,16 @@ void post_mobility_fixup(void)
 	/* Possibly switch to a new RFI flush type */
 	pseries_setup_rfi_flush();
 
+	/*
+	 * In case an Lpar migrates from one system to another, system
+	 * parameter details like chips per sockets, cores per chip and
+	 * number of sockets details might change.
+	 * So, they needs to be re-initialized otherwise the
+	 * values will correspond to the previous system.
+	 * Call read_sys_info_pseries() to reinitialise the values.
+	 */
+	read_sys_info_pseries();
+
 	return;
 }
 
-- 
2.18.2


^ permalink raw reply related

* [PATCH v8 4/5] Documentation/ABI: Add ABI documentation for chips and sockets
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	mingo, kan.liang
In-Reply-To: <20200506110737.14904-1-kjain@linux.ibm.com>

Add documentation for the following sysfs files:
/sys/devices/hv_24x7/interface/chipspersocket,
/sys/devices/hv_24x7/interface/sockets,
/sys/devices/hv_24x7/interface/coresperchip

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 .../sysfs-bus-event_source-devices-hv_24x7    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index ec27c6c9e737..e8698afcd952 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -22,6 +22,27 @@ Description:
 		Exposes the "version" field of the 24x7 catalog. This is also
 		extractable from the provided binary "catalog" sysfs entry.
 
+What:		/sys/devices/hv_24x7/interface/sockets
+Date:		May 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs interface exposes the number of sockets present in the
+		system.
+
+What:		/sys/devices/hv_24x7/interface/chipspersocket
+Date:		May 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs interface exposes the number of chips per socket
+		present in the system.
+
+What:		/sys/devices/hv_24x7/interface/coresperchip
+Date:		May 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs interface exposes the number of cores per chip
+		present in the system.
+
 What:		/sys/bus/event_source/devices/hv_24x7/event_descs/<event-name>
 Date:		February 2014
 Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
-- 
2.18.2


^ permalink raw reply related

* [PATCH v8 3/5] powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show processor details
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	mingo, kan.liang
In-Reply-To: <20200506110737.14904-1-kjain@linux.ibm.com>

To expose the system dependent parameter like total number of
sockets and numbers of chips per socket, patch adds two sysfs files.
"sockets" and "chips" are added to /sys/devices/hv_24x7/interface/
of the "hv_24x7" pmu.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/hv-24x7.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 8cf242aad98f..f24dee2a660a 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -456,6 +456,24 @@ static ssize_t device_show_string(struct device *dev,
 	return sprintf(buf, "%s\n", (char *)d->var);
 }
 
+static ssize_t sockets_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_sockets);
+}
+
+static ssize_t chipspersocket_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_chipspersocket);
+}
+
+static ssize_t coresperchip_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_coresperchip);
+}
+
 static struct attribute *device_str_attr_create_(char *name, char *str)
 {
 	struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
@@ -1102,6 +1120,9 @@ PAGE_0_ATTR(catalog_len, "%lld\n",
 		(unsigned long long)be32_to_cpu(page_0->length) * 4096);
 static BIN_ATTR_RO(catalog, 0/* real length varies */);
 static DEVICE_ATTR_RO(domains);
+static DEVICE_ATTR_RO(sockets);
+static DEVICE_ATTR_RO(chipspersocket);
+static DEVICE_ATTR_RO(coresperchip);
 
 static struct bin_attribute *if_bin_attrs[] = {
 	&bin_attr_catalog,
@@ -1112,6 +1133,9 @@ static struct attribute *if_attrs[] = {
 	&dev_attr_catalog_len.attr,
 	&dev_attr_catalog_version.attr,
 	&dev_attr_domains.attr,
+	&dev_attr_sockets.attr,
+	&dev_attr_chipspersocket.attr,
+	&dev_attr_coresperchip.attr,
 	NULL,
 };
 
-- 
2.18.2


^ permalink raw reply related

* [PATCH v8 2/5] powerpc/hv-24x7: Add rtas call in hv-24x7 driver to get processor details
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	mingo, kan.liang
In-Reply-To: <20200506110737.14904-1-kjain@linux.ibm.com>

For hv_24x7 socket/chip level events, specific chip-id to which
the data requested should be added as part of pmu events.
But number of chips/socket in the system details are not exposed.

Patch implements read_sys_info_pseries() to get system
parameter values like number of sockets and chips per socket.
Rtas_call with token "PROCESSOR_MODULE_INFO"
is used to get these values.

Sub-sequent patch exports these values via sysfs.

Patch also make these parameters default to 1.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/include/asm/rtas.h |  1 +
 arch/powerpc/perf/hv-24x7.c     | 72 +++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3c1887351c71..1c11f814932d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -482,6 +482,7 @@ static inline void rtas_initialize(void) { };
 #endif
 
 extern int call_rtas(const char *, int, int, unsigned long *, ...);
+extern void read_sys_info_pseries(void);
 
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_RTAS_H */
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 48e8f4b17b91..8cf242aad98f 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -20,6 +20,7 @@
 #include <asm/io.h>
 #include <linux/byteorder/generic.h>
 
+#include <asm/rtas.h>
 #include "hv-24x7.h"
 #include "hv-24x7-catalog.h"
 #include "hv-common.h"
@@ -57,6 +58,75 @@ static bool is_physical_domain(unsigned domain)
 	}
 }
 
+/*
+ * The Processor Module Information system parameter allows transferring
+ * of certain processor module information from the platform to the OS.
+ * Refer PAPR+ document to get parameter token value as '43'.
+ */
+
+#define PROCESSOR_MODULE_INFO   43
+#define PROCESSOR_MAX_LENGTH	(8 * 1024)
+
+DEFINE_SPINLOCK(rtas_local_data_buf_lock);
+EXPORT_SYMBOL(rtas_local_data_buf_lock);
+
+static u32 phys_sockets;	/* Physical sockets */
+static u32 phys_chipspersocket;	/* Physical chips per socket*/
+static u32 phys_coresperchip; /* Physical cores per chip */
+
+/*
+ * Function read_sys_info_pseries() make a rtas_call which require
+ * data buffer of size 8K. As standard 'rtas_data_buf' is of size
+ * 4K, we are adding new local buffer 'rtas_local_data_buf'.
+ */
+static __be16 rtas_local_data_buf[PROCESSOR_MAX_LENGTH] __cacheline_aligned;
+
+/*
+ * read_sys_info_pseries()
+ * Retrieve the number of sockets and chips per socket and cores per
+ * chip details through the get-system-parameter rtas call.
+ */
+void read_sys_info_pseries(void)
+{
+	int call_status, len, ntypes;
+
+	/*
+	 * Making system parameter: chips and sockets and cores per chip
+	 * default to 1.
+	 */
+	phys_sockets = 1;
+	phys_chipspersocket = 1;
+	phys_coresperchip = 1;
+	memset(rtas_local_data_buf, 0, PROCESSOR_MAX_LENGTH * sizeof(__be16));
+	spin_lock(&rtas_local_data_buf_lock);
+
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				PROCESSOR_MODULE_INFO,
+				__pa(rtas_local_data_buf),
+				PROCESSOR_MAX_LENGTH);
+
+	spin_unlock(&rtas_local_data_buf_lock);
+
+	if (call_status != 0) {
+		pr_info("Error calling get-system-parameter (0x%x)\n",
+			call_status);
+	} else {
+		rtas_local_data_buf[PROCESSOR_MAX_LENGTH - 1] = '\0';
+		len = be16_to_cpup((__be16 *)&rtas_local_data_buf[0]);
+		if (len < 4)
+			return;
+
+		ntypes = be16_to_cpup(&rtas_local_data_buf[1]);
+
+		if (!ntypes)
+			return;
+		phys_sockets = be16_to_cpup(&rtas_local_data_buf[2]);
+		phys_chipspersocket = be16_to_cpup(&rtas_local_data_buf[3]);
+		phys_coresperchip = be16_to_cpup(&rtas_local_data_buf[4]);
+	}
+}
+
 /* Domains for which more than one result element are returned for each event. */
 static bool domain_needs_aggregation(unsigned int domain)
 {
@@ -1605,6 +1675,8 @@ static int hv_24x7_init(void)
 	if (r)
 		return r;
 
+	read_sys_info_pseries();
+
 	return 0;
 }
 
-- 
2.18.2


^ permalink raw reply related

* [PATCH v8 1/5] powerpc/perf/hv-24x7: Fix inconsistent output values incase multiple hv-24x7 events run
From: Kajol Jain @ 2020-05-06 11:07 UTC (permalink / raw)
  To: acme, linuxppc-dev, mpe, suka
  Cc: ravi.bangoria, maddy, jmario, peterz, gregkh, mpetlan,
	alexander.shishkin, ak, yao.jin, anju, mamatha4, jolsa, namhyung,
	Sukadev Bhattiprolu, mingo, kan.liang
In-Reply-To: <20200506110737.14904-1-kjain@linux.ibm.com>

Commit 2b206ee6b0df ("powerpc/perf/hv-24x7: Display change in counter
values")' added to print _change_ in the counter value rather then raw
value for 24x7 counters. Incase of transactions, the event count
is set to 0 at the beginning of the transaction. It also sets
the event's prev_count to the raw value at the time of initialization.
Because of setting event count to 0, we are seeing some weird behaviour,
whenever we run multiple 24x7 events at a time.

For example:

command#: ./perf stat -e "{hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/,
			   hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/}"
	  		   -C 0 -I 1000 sleep 100

     1.000121704                120 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     1.000121704                  5 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     2.000357733                  8 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     2.000357733                 10 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     3.000495215 18,446,744,073,709,551,616 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     3.000495215 18,446,744,073,709,551,616 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     4.000641884                 56 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     4.000641884 18,446,744,073,709,551,616 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     5.000791887 18,446,744,073,709,551,616 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/

Getting these large values in case we do -I.

As we are setting event_count to 0, for interval case, overall event_count is not
coming in incremental order. As we may can get new delta lesser then previous count.
Because of which when we print intervals, we are getting negative value which create
these large values.

This patch removes part where we set event_count to 0 in function
'h_24x7_event_read'. There won't be much impact as we do set event->hw.prev_count
to the raw value at the time of initialization to print change value.

With this patch
In power9 platform

command#: ./perf stat -e "{hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/,
		           hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/}"
			   -C 0 -I 1000 sleep 100

     1.000117685                 93 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     1.000117685                  1 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     2.000349331                 98 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     2.000349331                  2 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     3.000495900                131 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     3.000495900                  4 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     4.000645920                204 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/
     4.000645920                 61 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=1/
     4.284169997                 22 hv_24x7/PM_MCS01_128B_RD_DISP_PORT01,chip=0/

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Suggested-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 arch/powerpc/perf/hv-24x7.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 573e0b309c0c..48e8f4b17b91 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1400,16 +1400,6 @@ static void h_24x7_event_read(struct perf_event *event)
 			h24x7hw = &get_cpu_var(hv_24x7_hw);
 			h24x7hw->events[i] = event;
 			put_cpu_var(h24x7hw);
-			/*
-			 * Clear the event count so we can compute the _change_
-			 * in the 24x7 raw counter value at the end of the txn.
-			 *
-			 * Note that we could alternatively read the 24x7 value
-			 * now and save its value in event->hw.prev_count. But
-			 * that would require issuing a hcall, which would then
-			 * defeat the purpose of using the txn interface.
-			 */
-			local64_set(&event->count, 0);
 		}
 
 		put_cpu_var(hv_24x7_reqb);
-- 
2.18.2


^ permalink raw reply related

* Re: [PATCH v7 2/5] powerpc/hv-24x7: Add rtas call in hv-24x7 driver to get processor details
From: kajoljain @ 2020-05-06  9:53 UTC (permalink / raw)
  To: Michael Ellerman, acme, linuxppc-dev, sukadev
  Cc: mark.rutland, ravi.bangoria, maddy, tglx, jmario, mpetlan, peterz,
	gregkh, linux-kernel, alexander.shishkin, linux-perf-users, ak,
	yao.jin, anju, mamatha4, jolsa, namhyung, mingo, kan.liang
In-Reply-To: <87ftcmfryt.fsf@mpe.ellerman.id.au>



On 4/29/20 5:01 PM, Michael Ellerman wrote:
> Hi Kajol,
> 
> Some comments inline ...
> 
> Kajol Jain <kjain@linux.ibm.com> writes:
>> For hv_24x7 socket/chip level events, specific chip-id to which
>> the data requested should be added as part of pmu events.
>> But number of chips/socket in the system details are not exposed.
>>
>> Patch implements read_sys_info_pseries() to get system
>> parameter values like number of sockets and chips per socket.
>> Rtas_call with token "PROCESSOR_MODULE_INFO"
>> is used to get these values.
>>
>> Sub-sequent patch exports these values via sysfs.
>>
>> Patch also make these parameters default to 1.
>>
>> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
>> ---
>>  arch/powerpc/perf/hv-24x7.c              | 72 ++++++++++++++++++++++++
>>  arch/powerpc/platforms/pseries/pseries.h |  3 +
>>  2 files changed, 75 insertions(+)
>>
>> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
>> index 48e8f4b17b91..9ae00f29bd21 100644
>> --- a/arch/powerpc/perf/hv-24x7.c
>> +++ b/arch/powerpc/perf/hv-24x7.c
>> @@ -20,6 +20,11 @@
>>  #include <asm/io.h>
>>  #include <linux/byteorder/generic.h>
>>  
>> +#ifdef CONFIG_PPC_RTAS
> 
> This driver can only be build on pseries, and pseries always selects
> RTAS. So the ifdef is unncessary.

Hi Michael,
	Thanks for review, I will remove this check.

> 
>> +#include <asm/rtas.h>
>> +#include <../../platforms/pseries/pseries.h>
>> +#endif
> 
> That's not really what the platform header is intended for.
> 
> You should put the extern in arch/powerpc/include/asm somewhere.
> 
> Maybe rtas.h
> 
>> @@ -57,6 +62,69 @@ static bool is_physical_domain(unsigned domain)
>>  	}
>>  }
>>  
>> +#ifdef CONFIG_PPC_RTAS
> 
> Not needed.
> 
>> +#define PROCESSOR_MODULE_INFO   43
> 
> Please document where these come from, presumably LoPAPR somewhere?
> 

Sure, will add the details.

>> +#define PROCESSOR_MAX_LENGTH	(8 * 1024)
>> +
>> +static int strbe16toh(const char *buf, int offset)
>> +{
>> +	return (buf[offset] << 8) + buf[offset + 1];
>> +}
> 
> I'm confused by this. "str" implies string, a string is an array of
> bytes and has no endian. But then be16 implies it's an array of __be16,
> in which case buf should be a __be16 *.
> 

Yes right, actually I was following implementation in util-linux. But what you
suggested make more sense, will update accordingly.

>> +
>> +static u32		physsockets;	/* Physical sockets */
>> +static u32		physchips;	/* Physical chips */
> 
> No tabs there please.

Sure will update.

> 
>> +
>> +/*
>> + * Function read_sys_info_pseries() make a rtas_call which require
>> + * data buffer of size 8K. As standard 'rtas_data_buf' is of size
>> + * 4K, we are adding new local buffer 'rtas_local_data_buf'.
>> + */
>> +char rtas_local_data_buf[PROCESSOR_MAX_LENGTH] __cacheline_aligned;
> 
> static?
> 
>> +/*
>> + * read_sys_info_pseries()
>> + * Retrieve the number of sockets and chips per socket details
>> + * through the get-system-parameter rtas call.
>> + */
>> +void read_sys_info_pseries(void)
>> +{
>> +	int call_status, len, ntypes;
>> +
>> +	/*
>> +	 * Making system parameter: chips and sockets default to 1.
>> +	 */
>> +	physsockets = 1;
>> +	physchips = 1;
>> +	memset(rtas_local_data_buf, 0, PROCESSOR_MAX_LENGTH);
>> +	spin_lock(&rtas_data_buf_lock);
> 
> You're not using the rtas_data_buf, so why are you taking the
> rtas_data_buf_lock?

Sure, I will add new lock specific for rtas_local_data_buf

> 
>> +	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
>> +				NULL,
>> +				PROCESSOR_MODULE_INFO,
>> +				__pa(rtas_local_data_buf),
>> +				PROCESSOR_MAX_LENGTH);
>> +
>> +	spin_unlock(&rtas_data_buf_lock);
>> +
>> +	if (call_status != 0) {
>> +		pr_info("%s %s Error calling get-system-parameter (0x%x)\n",
>> +			__FILE__, __func__, call_status);
> 
> pr_err(), don't use __FILE__, this file already uses pr_fmt(). Not sure
> __func__ is really necessary either
> 
> 		return;
> 
> Then you can deindent the next block.
> 
>> +	} else {
>> +		rtas_local_data_buf[PROCESSOR_MAX_LENGTH - 1] = '\0';
>> +		len = strbe16toh(rtas_local_data_buf, 0);
> 
> Why isn't the buffer a __be16 array, and then you just use be16_to_cpu() ?
> 
>> +		if (len < 6)
>> +			return;
>> +
>> +		ntypes = strbe16toh(rtas_local_data_buf, 2);
>> +
>> +		if (!ntypes)
>> +			return;
> 
> What is ntype
ntype specify processor module type

> 
>> +		physsockets = strbe16toh(rtas_local_data_buf, 4);
>> +		physchips = strbe16toh(rtas_local_data_buf, 6);
>> +	}
>> +}
>> +#endif /* CONFIG_PPC_RTAS */
>> +
>>  /* Domains for which more than one result element are returned for each event. */
>>  static bool domain_needs_aggregation(unsigned int domain)
>>  {
>> @@ -1605,6 +1673,10 @@ static int hv_24x7_init(void)
>>  	if (r)
>>  		return r;
>>  
>> +#ifdef CONFIG_PPC_RTAS
>> +	read_sys_info_pseries();
>> +#endif
>

Will remove this config checks.

Thanks,
Kajol Jain

 
>> +
>>  	return 0;
>>  }
>>  
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>> index 13fa370a87e4..1727559ce304 100644
>> --- a/arch/powerpc/platforms/pseries/pseries.h
>> +++ b/arch/powerpc/platforms/pseries/pseries.h
>> @@ -19,6 +19,9 @@ extern void request_event_sources_irqs(struct device_node *np,
>>  struct pt_regs;
>>  
>>  extern int pSeries_system_reset_exception(struct pt_regs *regs);
>> +#ifdef CONFIG_PPC_RTAS
>> +extern void read_sys_info_pseries(void);
>> +#endif
>>  extern int pSeries_machine_check_exception(struct pt_regs *regs);
>>  extern long pseries_machine_check_realmode(struct pt_regs *regs);
> > 
> cheers
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox