* [RFC 1/2] powerpc/mm: Add trace points for various types of hash faults
@ 2016-02-17 7:00 Anshuman Khandual
2016-02-17 7:00 ` [RFC 2/2] selftests/powerpc: Add tests for various hash page fault paths Anshuman Khandual
0 siblings, 1 reply; 2+ messages in thread
From: Anshuman Khandual @ 2016-02-17 7:00 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, mpe
This adds trace point definitions and invocations for all types
of hash faults like THP, HugeTLB, 64K, 4K mappings. These are
intended to be used in user space for performance and functional
evaluation of various memory management paths.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/trace.h | 82 ++++++++++++++++++++++++++++++++++++
arch/powerpc/mm/hash64_64k.c | 3 ++
arch/powerpc/mm/hugepage-hash64.c | 2 +
arch/powerpc/mm/hugetlbpage-hash64.c | 3 ++
4 files changed, 90 insertions(+)
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 8e86b48..4f0a829 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -164,6 +164,88 @@ TRACE_EVENT(hash_fault,
__entry->addr, __entry->access, __entry->trap)
);
+TRACE_EVENT(hash_fault_hugetlb,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("HugeTLB hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+TRACE_EVENT(hash_fault_thp,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("THP hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+#endif
+
+TRACE_EVENT(hash_fault_64K,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("64K hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+
+TRACE_EVENT(hash_fault_4K,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("4K hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e..7966fee 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
+#include <asm/trace.h>
/*
* index from 0 - 15
*/
@@ -58,6 +59,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long vpn, hash, slot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+ trace_hash_fault_4K(ea, access, trap);
/*
* atomically mark the linux large page PTE busy and dirty
*/
@@ -221,6 +223,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
unsigned long vpn, hash, slot;
unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift;
+ trace_hash_fault_64K(ea, access, trap);
/*
* atomically mark the linux large page PTE busy and dirty
*/
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b..daa588c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -17,6 +17,7 @@
*/
#include <linux/mm.h>
#include <asm/machdep.h>
+#include <asm/trace.h>
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
pmd_t *pmdp, unsigned long trap, unsigned long flags,
@@ -29,6 +30,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
int ret, lpsize = MMU_PAGE_16M;
unsigned long vpn, hash, shift, slot;
+ trace_hash_fault_thp(ea, access, trap);
/*
* atomically mark the linux large page PMD busy and dirty
*/
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index e2138c7..250c0a1 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -13,6 +13,7 @@
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
+#include <asm/trace.h>
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rlags,
@@ -29,6 +30,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
+ trace_hash_fault_hugetlb(ea, access, trap);
+
/* Search the Linux page table for a match with va */
vpn = hpt_vpn(ea, vsid, ssize);
--
2.1.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [RFC 2/2] selftests/powerpc: Add tests for various hash page fault paths
2016-02-17 7:00 [RFC 1/2] powerpc/mm: Add trace points for various types of hash faults Anshuman Khandual
@ 2016-02-17 7:00 ` Anshuman Khandual
0 siblings, 0 replies; 2+ messages in thread
From: Anshuman Khandual @ 2016-02-17 7:00 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, mpe
This new test case tries to create virtual memory scenarios to
drive different types of hash page faults. It also uses perf
API to capture the number of times it went through the intended
hash fault paths.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
Test result looks like this now. The objective is to auto verify
the count of these traces for various buffer sizes and scenarios.
created.
vm.nr_hugepages = 10
HugeTLB allocation::::::::
[ faults]: 1
[ major-faults]: 0
[ minor-faults]: 1
[ hash_faults]: 2
[ hash_faults_thp]: 0
[ hash_faults_64K]: 0
[ hash_faults_4K]: 0
[ hash_faults_hugetlb]: 1
THP allocation::::::::
[ faults]: 256
[ major-faults]: 0
[ minor-faults]: 256
[ hash_faults]: 256
[ hash_faults_thp]: 0
[ hash_faults_64K]: 256
[ hash_faults_4K]: 0
[ hash_faults_hugetlb]: 0
SUBPAGE protection::::
[ faults]: 0
[ major-faults]: 0
[ minor-faults]: 0
[ hash_faults]: 4096
[ hash_faults_thp]: 0
[ hash_faults_64K]: 0
[ hash_faults_4K]: 4096
[ hash_faults_hugetlb]: 0
PFN flush::::::::::::
[ faults]: 256
[ major-faults]: 0
[ minor-faults]: 256
[ hash_faults]: 4352
[ hash_faults_thp]: 0
[ hash_faults_64K]: 0
[ hash_faults_4K]: 4096
[ hash_faults_hugetlb]: 0
vm.nr_hugepages = 0
THP allocation on a free system is not happening even after a call to
MADVISE_HUGEPAGE. Problem seems to be related to NUMA memory configuration
which I will debug further.
tools/testing/selftests/powerpc/mm/Makefile | 2 +-
tools/testing/selftests/powerpc/mm/mem_perf.c | 198 ++++++++++++++++++++++++
tools/testing/selftests/powerpc/mm/run_mem_perf | 3 +
3 files changed, 202 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/mm/mem_perf.c
create mode 100755 tools/testing/selftests/powerpc/mm/run_mem_perf
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index ee179e2..13bc5c3 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -1,7 +1,7 @@
noarg:
$(MAKE) -C ../
-TEST_PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_PROGS := hugetlb_vs_thp_test subpage_prot mem_perf
TEST_FILES := tempfile
all: $(TEST_PROGS) $(TEST_FILES)
diff --git a/tools/testing/selftests/powerpc/mm/mem_perf.c b/tools/testing/selftests/powerpc/mm/mem_perf.c
new file mode 100644
index 0000000..f5d8348
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/mem_perf.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright 2016, Anshuman Khandual, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "../pmu/event.c"
+
+#define ADDR_INPUT 0xa0000000000UL
+#define HPAGE_SIZE 0x1000000
+#define PSIZE_64K 0x10000
+#define PSIZE_4K 0x1000
+
+#define MAX_MM_EVENTS 8
+
+struct event mm_events[MAX_MM_EVENTS];
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+ event_init_opts(e, config, PERF_TYPE_SOFTWARE, name);
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+static void setup_event_tr(struct event *e, u64 config, char *name)
+{
+ memset(e, 0, sizeof(*e));
+
+ e->name = name;
+ e->attr.type = PERF_TYPE_TRACEPOINT;
+ e->attr.config = config;
+ e->attr.size = sizeof(e->attr);
+ e->attr.sample_period = PERF_SAMPLE_IDENTIFIER;
+ e->attr.inherit = 1;
+ e->attr.enable_on_exec = 1;
+ e->attr.exclude_guest = 1;
+
+ /* This has to match the structure layout in the header */
+ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ e->attr.disabled = 1;
+}
+
+
+static void prepare_events(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ event_reset(&mm_events[i]);
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ event_enable(&mm_events[i]);
+}
+
+static void close_events(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ event_close(&mm_events[i]);
+}
+
+static void display_events(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ event_disable(&mm_events[i]);
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ event_read(&mm_events[i]);
+
+ for (i = 0; i < MAX_MM_EVENTS; i++)
+ printf("[%20s]: \t %llu\n", mm_events[i].name, mm_events[i].result.value);
+}
+
+static void setup_events(void)
+{
+ setup_event(&mm_events[0], PERF_COUNT_SW_PAGE_FAULTS, "faults");
+ setup_event(&mm_events[1], PERF_COUNT_SW_PAGE_FAULTS_MAJ, "major-faults");
+ setup_event(&mm_events[2], PERF_COUNT_SW_PAGE_FAULTS_MIN, "minor-faults");
+
+ setup_event_tr(&mm_events[3], 22 , "hash_faults");
+ setup_event_tr(&mm_events[4], 20 , "hash_faults_thp");
+ setup_event_tr(&mm_events[5], 19 , "hash_faults_64K");
+ setup_event_tr(&mm_events[6], 18 , "hash_faults_4K");
+ setup_event_tr(&mm_events[7], 21 , "hash_faults_hugetlb");
+}
+
+static void open_events()
+{
+ int i;
+
+ for (i = 0; i < MAX_MM_EVENTS; i++) {
+ if (event_open(&mm_events[i]))
+ perror("event_open() failed");
+ }
+}
+
+static void subpage_prot_change(char *ptr, unsigned long size)
+{
+ unsigned int *map;
+ unsigned long npages, i, err;
+
+ npages = size / PSIZE_64K;
+ map = malloc(sizeof(unsigned int) * npages);
+ if (!map) {
+ perror("malloc() failed");
+ exit(-1);
+ }
+
+ for (i = 0; i < npages; i++)
+ map[i] = 0;
+
+ err = syscall(__NR_subpage_prot, ptr, size, map);
+ if (err) {
+ perror("subpage() protection failed");
+ exit(-1);
+ }
+}
+
+static void dont_need_request(char *ptr, unsigned long size)
+{
+ if (madvise(ptr, size, MADV_DONTNEED)){
+ perror("madvise");
+ exit(-1);
+ }
+}
+
+static void thp_request(char *ptr, unsigned long size)
+{
+ if (madvise(ptr, size, MADV_HUGEPAGE)){
+ perror("madvise");
+ exit(-1);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int nr_hp = strtol(argv[1], NULL, 0);
+ char *ptr, *htlb;
+
+ setup_events();
+ open_events();
+
+ do {
+ printf("HugeTLB allocation::::::::\n");
+ htlb = mmap(NULL, nr_hp * HPAGE_SIZE, PROT_READ | PROT_WRITE,MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0);
+ if (!htlb) {
+ perror("mmap");
+ exit(-1);
+ }
+
+ prepare_events();
+ memset(htlb, 0, nr_hp * HPAGE_SIZE);
+ display_events();
+
+ printf("THP allocation::::::::\n");
+ ptr = mmap((void *) ADDR_INPUT, nr_hp * HPAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+ if (ptr != (void *) ADDR_INPUT) {
+ perror("mmap");
+ exit(-1);
+ }
+
+ thp_request(ptr, nr_hp * HPAGE_SIZE);
+ prepare_events();
+ memset(ptr, 0, nr_hp * HPAGE_SIZE);
+ display_events();
+
+ printf("SUBPAGE protection::::\n");
+ subpage_prot_change(ptr, nr_hp * HPAGE_SIZE);
+
+ prepare_events();
+ memset(ptr, 0, nr_hp * HPAGE_SIZE);
+ display_events();
+
+ printf("PFN flush::::::::::::\n");
+ dont_need_request(ptr, nr_hp * HPAGE_SIZE);
+
+ prepare_events();
+ memset(ptr, 0, nr_hp * HPAGE_SIZE);
+ display_events();
+
+ munmap(ptr, nr_hp * HPAGE_SIZE);
+ munmap(htlb, nr_hp * HPAGE_SIZE);
+
+ } while(0);
+ close_events();
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/mm/run_mem_perf b/tools/testing/selftests/powerpc/mm/run_mem_perf
new file mode 100755
index 0000000..ee510f5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/run_mem_perf
@@ -0,0 +1,3 @@
+sysctl vm.nr_hugepages=10
+./mem_perf 1
+sysctl vm.nr_hugepages=0
--
2.1.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-02-17 7:01 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-17 7:00 [RFC 1/2] powerpc/mm: Add trace points for various types of hash faults Anshuman Khandual
2016-02-17 7:00 ` [RFC 2/2] selftests/powerpc: Add tests for various hash page fault paths Anshuman Khandual
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).