* [PATCH 1/5] powerpc, perf: Add new BHRB related instructions on POWER8
2013-04-16 12:56 [PATCH 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8 Anshuman Khandual
@ 2013-04-16 12:56 ` Anshuman Khandual
2013-04-16 12:56 ` [PATCH 2/5] powerpc, perf: Add basic assembly code to read BHRB entries " Anshuman Khandual
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Anshuman Khandual @ 2013-04-16 12:56 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
This patch adds new instructions support for reading various
BHRB entries and also clearing the BHRB buffer.
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/ppc-opcode.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8752bc8..93ae5a1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -82,6 +82,7 @@
#define __REGA0_R31 31
/* sorted alphabetically */
+#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
@@ -297,6 +298,12 @@
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+/* BHRB instructions */
+#define PPC_CLRBHRB stringify_in_c(.long 0x7c00035c)
+#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \
+ __PPC_RS(r) | \
+ (((n) & 0x1f) << 11))
+
/* Transactional memory instructions */
#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT)
#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8
2013-04-16 12:56 [PATCH 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8 Anshuman Khandual
2013-04-16 12:56 ` [PATCH 1/5] powerpc, perf: Add new BHRB related instructions " Anshuman Khandual
@ 2013-04-16 12:56 ` Anshuman Khandual
2013-04-16 12:56 ` [PATCH 3/5] powerpc, perf: Add new BHRB related generic functions, data and flags Anshuman Khandual
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Anshuman Khandual @ 2013-04-16 12:56 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/bhrb.S | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/perf/bhrb.S
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 472db18..510fae1 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PERF_EVENTS) += callchain.o
-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
power8-pmu.o
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
new file mode 100644
index 0000000..cffb815
--- /dev/null
+++ b/arch/powerpc/perf/bhrb.S
@@ -0,0 +1,34 @@
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+ .text
+
+.balign 8
+
+/* r3 = n (where n = [0-1023])
+ * The maximum number of BHRB entries supported with PPC_MFBHRBE instruction
+ * is 1024. We have limited number of table entries here as POWER8 implements
+ * 32 BHRB entries.
+ */
+
+/* .global read_bhrb */
+_GLOBAL(read_bhrb)
+ cmpldi r3,1023
+ bgt 1f
+ ld r4,bhrb_table@got(r2)
+ sldi r3,r3,3
+ add r3,r4,r3
+ mtctr r3
+ bctr
+1: li r3,0
+ blr
+
+#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
+#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
+#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
+#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
+#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
+#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
+
+bhrb_table:
+ MFBHRB_TABLE32(0)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/5] powerpc, perf: Add new BHRB related generic functions, data and flags
2013-04-16 12:56 [PATCH 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8 Anshuman Khandual
2013-04-16 12:56 ` [PATCH 1/5] powerpc, perf: Add new BHRB related instructions " Anshuman Khandual
2013-04-16 12:56 ` [PATCH 2/5] powerpc, perf: Add basic assembly code to read BHRB entries " Anshuman Khandual
@ 2013-04-16 12:56 ` Anshuman Khandual
2013-04-16 12:56 ` [PATCH 4/5] powerpc, perf: Define BHRB generic functions, data and flags for POWER8 Anshuman Khandual
2013-04-16 12:56 ` [PATCH 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB Anshuman Khandual
4 siblings, 0 replies; 6+ messages in thread
From: Anshuman Khandual @ 2013-04-16 12:56 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/perf_event_server.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 57b42da..3f0c15c 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -33,6 +33,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
+ u64 (*bhrb_filter_map)(u64 branch_sample_type);
+ void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
@@ -42,6 +44,9 @@ struct power_pmu {
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+ /* BHRB entries in the PMU */
+ int bhrb_nr;
};
/*
@@ -54,6 +59,7 @@ struct power_pmu {
#define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */
#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
+#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */
/*
* Values for flags to get_alternatives()
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/5] powerpc, perf: Define BHRB generic functions, data and flags for POWER8
2013-04-16 12:56 [PATCH 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8 Anshuman Khandual
` (2 preceding siblings ...)
2013-04-16 12:56 ` [PATCH 3/5] powerpc, perf: Add new BHRB related generic functions, data and flags Anshuman Khandual
@ 2013-04-16 12:56 ` Anshuman Khandual
2013-04-16 12:56 ` [PATCH 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB Anshuman Khandual
4 siblings, 0 replies; 6+ messages in thread
From: Anshuman Khandual @ 2013-04-16 12:56 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/perf/power8-pmu.c | 57 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 106ae0b..153408c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -109,6 +109,16 @@
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
+/* MMCRA IFM bits - POWER8 */
+#define POWER8_MMCRA_IFM1 0x0000000040000000UL
+#define POWER8_MMCRA_IFM2 0x0000000080000000UL
+#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
+
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
/*
* Layout of constraint bits:
*
@@ -428,6 +438,48 @@ static int power8_generic_events[] = {
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
};
+static u64 power8_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+ u64 br_privilege = branch_sample_type & ONLY_PLM;
+
+ /* BHRB and regular PMU events share the same prvillege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. So privilege state filter criteria for BHRB
+ * and the companion PMU events has to be the same. As a default
+ * "perf record" tool sets all privillege bits ON when no filter
+ * criteria is provided in the command line. So as along as all
+ * privillege bits are ON or they are OFF, we are good to go.
+ */
+ if ((br_privilege != 7) && (br_privilege != 0))
+ return -1;
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power8_config_bhrb(u64 pmu_bhrb_filter)
+{
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
static struct power_pmu power8_pmu = {
.name = "POWER8",
.n_counter = 6,
@@ -435,12 +487,15 @@ static struct power_pmu power8_pmu = {
.add_fields = POWER8_ADD_FIELDS,
.test_adder = POWER8_TEST_ADDER,
.compute_mmcr = power8_compute_mmcr,
+ .config_bhrb = power8_config_bhrb,
+ .bhrb_filter_map = power8_bhrb_filter_map,
.get_constraint = power8_get_constraint,
.disable_pmc = power8_disable_pmc,
- .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER,
+ .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
.attr_groups = power8_pmu_attr_groups,
+ .bhrb_nr = 32,
};
static int __init init_power8_pmu(void)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB
2013-04-16 12:56 [PATCH 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8 Anshuman Khandual
` (3 preceding siblings ...)
2013-04-16 12:56 ` [PATCH 4/5] powerpc, perf: Define BHRB generic functions, data and flags for POWER8 Anshuman Khandual
@ 2013-04-16 12:56 ` Anshuman Khandual
4 siblings, 0 replies; 6+ messages in thread
From: Anshuman Khandual @ 2013-04-16 12:56 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: mikey
This patch provides basic enablement for perf branch stack sampling framework
on POWER8 processor with a new PMU feature called BHRB (Branch History Rolling
Buffer).
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++++++++++--
arch/powerpc/perf/perf_event_bhrb.c | 75 +++++++++++++++++++++++++++++
2 files changed, 168 insertions(+), 3 deletions(-)
create mode 100644 arch/powerpc/perf/perf_event_bhrb.c
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4ac6e64..f4d1347 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -19,6 +19,8 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
+#define BHRB_MAX_ENTRIES 32
+
struct cpu_hw_events {
int n_events;
int n_percpu;
@@ -38,11 +40,21 @@ struct cpu_hw_events {
unsigned int group_flag;
int n_txn_start;
+
+ /* BHRB bits */
+ u64 bhrb_filter; /* BHRB HW branch filter */
+ int bhrb_users;
+ void *bhrb_context;
+ struct perf_branch_stack bhrb_stack;
+ struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
};
+
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
struct power_pmu *ppmu;
+#include "perf_event_bhrb.c"
+
/*
* Normally, to ignore kernel events we set the FCS (freeze counters
* in supervisor mode) bit in MMCR0, but if the kernel runs with the
@@ -858,6 +870,9 @@ static void power_pmu_enable(struct pmu *pmu)
}
out:
+ if (cpuhw->bhrb_users)
+ ppmu->config_bhrb(cpuhw->bhrb_filter);
+
local_irq_restore(flags);
}
@@ -888,6 +903,47 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
+/* Reset all possible BHRB entries */
+static void power_pmu_bhrb_reset(void)
+{
+ asm volatile(PPC_CLRBHRB);
+}
+
+void power_pmu_bhrb_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ /* Clear BHRB if we changed task context to avoid data leaks */
+ if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
+ power_pmu_bhrb_reset();
+ cpuhw->bhrb_context = event->ctx;
+ }
+ cpuhw->bhrb_users++;
+}
+
+void power_pmu_bhrb_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ cpuhw->bhrb_users--;
+ WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+
+ if (!cpuhw->disabled && !cpuhw->bhrb_users) {
+ /* BHRB cannot be turned off when other
+ * events are active on the PMU.
+ */
+
+ /* avoid stale pointer */
+ cpuhw->bhrb_context = NULL;
+ }
+}
+
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
@@ -947,6 +1003,9 @@ nocheck:
ret = 0;
out:
+ if (has_branch_stack(event))
+ power_pmu_bhrb_enable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
@@ -999,6 +1058,9 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
+ if (has_branch_stack(event))
+ power_pmu_bhrb_disable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@@ -1117,6 +1179,15 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}
+/* Called from ctxsw to prevent one process's branch entries to
+ * mingle with the other process's entries during context switch.
+ */
+void power_pmu_flush_branch_stack(void)
+{
+ if (ppmu->bhrb_nr)
+ power_pmu_bhrb_reset();
+}
+
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
@@ -1231,9 +1302,11 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (has_branch_stack(event)) {
+ /* PMU has BHRB enabled */
+ if (!(ppmu->flags & PPMU_BHRB))
+ return -EOPNOTSUPP;
+ }
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
@@ -1314,6 +1387,15 @@ static int power_pmu_event_init(struct perf_event *event)
cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
+
+ if (has_branch_stack(event)) {
+ cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if(cpuhw->bhrb_filter == -1)
+ return -EOPNOTSUPP;
+ }
+
put_cpu_var(cpu_hw_events);
if (err)
return -EINVAL;
@@ -1372,6 +1454,7 @@ struct pmu power_pmu = {
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
.event_idx = power_pmu_event_idx,
+ .flush_branch_stack = power_pmu_flush_branch_stack,
};
@@ -1433,6 +1516,13 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
+ if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct cpu_hw_events *cpuhw;
+ cpuhw = &__get_cpu_var(cpu_hw_events);
+ power_pmu_bhrb_read(cpuhw);
+ data.br_stack = &cpuhw->bhrb_stack;
+ }
+
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
}
diff --git a/arch/powerpc/perf/perf_event_bhrb.c b/arch/powerpc/perf/perf_event_bhrb.c
new file mode 100644
index 0000000..5beddef
--- /dev/null
+++ b/arch/powerpc/perf/perf_event_bhrb.c
@@ -0,0 +1,75 @@
+#define BHRB_TARGET 0x0000000000000002
+#define BHRB_PREDICTION 0x0000000000000001
+#define BHRB_EA 0xFFFFFFFFFFFFFFFC
+
+extern unsigned long int read_bhrb(int n);
+
+void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+{
+ u64 val;
+ u64 addr;
+ int r_index, u_index, target, pred;
+
+ r_index = 0;
+ u_index = 0;
+ while (r_index < ppmu->bhrb_nr) {
+ /* Assembly read function */
+ val = read_bhrb(r_index);
+
+ /* Terminal marker: End of valid BHRB entries */
+ if (val == 0) {
+ break;
+ } else {
+ /* BHRB field break up */
+ addr = val & BHRB_EA;
+ pred = val & BHRB_PREDICTION;
+ target = val & BHRB_TARGET;
+
+ /* Probable Missed entry: Not applicable for POWER8 */
+ if ((addr == 0) && (target == 0) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Real Missed entry: Power8 based missed entry */
+ if ((addr == 0) && (target == 1) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Reserved condition: Not a valid entry */
+ if ((addr == 0) && (target == 1) && (pred == 0)) {
+ r_index++;
+ continue;
+ }
+
+ /* Is a target address */
+ if (val & BHRB_TARGET) {
+ /* First address cannot be a target address */
+ if (r_index == 0) {
+ r_index++;
+ continue;
+ }
+
+ /* Update target address for the previous entry */
+ cpuhw->bhrb_entries[u_index - 1].to = addr;
+ cpuhw->bhrb_entries[u_index - 1].mispred = pred;
+ cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
+
+ /* Dont increment u_index */
+ r_index++;
+ } else {
+ /* Update address, flags for current entry */
+ cpuhw->bhrb_entries[u_index].from = addr;
+ cpuhw->bhrb_entries[u_index].mispred = pred;
+ cpuhw->bhrb_entries[u_index].predicted = ~pred;
+
+ /* Successfully popullated one entry */
+ u_index++;
+ r_index++;
+ }
+ }
+ }
+ cpuhw->bhrb_stack.nr = u_index;
+ return;
+}
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread