* [PATCH 3/7] perf, x86: Implement IBS event configuration
2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement " Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
2011-08-02 11:35 ` Peter Zijlstra
0 siblings, 1 reply; 14+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter
This patch implements perf configuration for AMD IBS. The IBS pmu is
selected using the type attribute in sysfs. There are two types of ibs
pmus, for instruction fetch (IBS_FETCH) and for instruction execution
(IBS_OP):
/sys/bus/event_source/devices/ibs_fetch/type
/sys/bus/event_source/devices/ibs_op/type
Except for the sample period IBS can only be set up with raw config
values and raw data samples. The event attributes for the syscall
should be programmed like this (IBS_FETCH):
type = get_pmu_type("/sys/bus/event_source/devices/ibs_fetch/type");
memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
attr.config = IBS_FETCH_CONFIG_DEFAULT;
This implementation does not yet support 64 bit counters. It is
limited to the hardware counter bit width which is 20 bits. 64 bit
support can be added later.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 99 ++++++++++++++++++++++++++----
1 files changed, 87 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index cae9528..bd77209 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -12,34 +12,108 @@
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+};
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (!perf_ibs)
return -ENOENT;
+
+ config = event->attr.config;
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (hwc->sample_period & 0x0f)
+ /* lower 4 bits can not be set in ibs max cnt */
+ return -EINVAL;
+ max_cnt = hwc->sample_period >> 4;
+ if (max_cnt & ~perf_ibs->cnt_mask)
+ /* out of range */
+ return -EINVAL;
+ config |= max_cnt;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!max_cnt)
+ return -EINVAL;
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
- event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+ event, event->attr.config, event->pmu->name, event->attr.type, event->oncpu);
+
return 0;
}
static int perf_ibs_add(struct perf_event *event, int flags)
{
- pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
- event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+ pr_info("Adding event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
+ event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
- pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
- event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+ pr_info("Removing event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
+ event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+};
+
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
};
static __init int perf_event_ibs_init(void)
@@ -50,7 +124,8 @@ static __init int perf_event_ibs_init(void)
if (!caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
+ perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", caps);
return 0;
--
1.7.5.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 3/7] perf, x86: Implement IBS event configuration
2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
@ 2011-08-02 11:35 ` Peter Zijlstra
2011-08-12 19:51 ` Robert Richter
0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:35 UTC (permalink / raw)
To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML
On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +static struct perf_ibs perf_ibs_fetch = {
> + .pmu = {
> + .event_init = perf_ibs_init,
> + .add = perf_ibs_add,
> + .del = perf_ibs_del,
> + },
> + .msr = MSR_AMD64_IBSFETCHCTL,
> + .config_mask = IBS_FETCH_CONFIG_MASK,
> + .cnt_mask = IBS_FETCH_MAX_CNT,
> + .enable_mask = IBS_FETCH_ENABLE,
> +};
> +
> +static struct perf_ibs perf_ibs_op = {
> + .pmu = {
> + .event_init = perf_ibs_init,
> + .add = perf_ibs_add,
> + .del = perf_ibs_del,
> + },
> + .msr = MSR_AMD64_IBSOPCTL,
> + .config_mask = IBS_OP_CONFIG_MASK,
> + .cnt_mask = IBS_OP_MAX_CNT,
> + .enable_mask = IBS_OP_ENABLE,
> };
>
It it intentional that you map the IBS things to the hw task_context ?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/7] perf, x86: Implement IBS event configuration
2011-08-02 11:35 ` Peter Zijlstra
@ 2011-08-12 19:51 ` Robert Richter
0 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-08-12 19:51 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML
On 02.08.11 07:35:33, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +static struct perf_ibs perf_ibs_fetch = {
> > + .pmu = {
> > + .event_init = perf_ibs_init,
> > + .add = perf_ibs_add,
> > + .del = perf_ibs_del,
> > + },
> > + .msr = MSR_AMD64_IBSFETCHCTL,
> > + .config_mask = IBS_FETCH_CONFIG_MASK,
> > + .cnt_mask = IBS_FETCH_MAX_CNT,
> > + .enable_mask = IBS_FETCH_ENABLE,
> > +};
> > +
> > +static struct perf_ibs perf_ibs_op = {
> > + .pmu = {
> > + .event_init = perf_ibs_init,
> > + .add = perf_ibs_add,
> > + .del = perf_ibs_del,
> > + },
> > + .msr = MSR_AMD64_IBSOPCTL,
> > + .config_mask = IBS_OP_CONFIG_MASK,
> > + .cnt_mask = IBS_OP_MAX_CNT,
> > + .enable_mask = IBS_OP_ENABLE,
> > };
> >
>
> It it intentional that you map the IBS things to the hw task_context ?
I didn't get you here.
-Robert
--
Advanced Micro Devices, Inc.
Operating System Research Center
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 0/7 -v2] perf, x86: Implement AMD IBS
@ 2011-09-07 16:36 Robert Richter
2011-09-07 16:36 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
` (6 more replies)
0 siblings, 7 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML
This patch set adds support for AMD IBS to perf. It is a new
implementation and unrelated to my previous postings last year. The
main differences are:
* separate and independent from x86 perfctrs, IBS could be used
without the x86 pmu,
* using dynamic pmu allocation, userspace uses sysfs to select the pmu,
* support for 64 bit counters,
* libperf based example code,
* shared IBS initialziation code for perf and oprofile.
The approach is still to collect raw sample data which should be the
most important use case for application developers. The data format is
the same as described in the IBS register specification.
Future work could be:
* better integration into the perf tool, use IBS for generic events
where possible,
* support of the precise event sampling perf i/f,
* implementation of extended IBS features (e.g. ext. counter width),
* support of counting (perf stat),
* in-kernel IBS event parsing,
* IBS tracepoint support.
Changes for V2:
* Remove printks in pmu function stubs.
* Modify perf_event_ibs_init() to use ibs_caps directly.
* Added bit mask for msr offsets.
* Added caps field to raw sample format.
* Added caps check for IBS_OP_CUR_CNT emulation.
* Updated include header files to fix build errors on some distros.
* Note: I kept example code for reference, the patch must not be
applied. I will come up with a sulution that integrates IBS into
perf-report.
-Robert
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-07 16:36 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
` (5 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
Moving IBS macros from oprofile to <asm/perf_event.h> to make it
available to perf. No additional changes.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/include/asm/perf_event.h | 38 +++++++++++++++++++++++++++++++--
arch/x86/kernel/cpu/perf_event_amd.c | 4 +-
arch/x86/oprofile/op_model_amd.c | 37 ++------------------------------
3 files changed, 40 insertions(+), 39 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 094fb30..bc801ac 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -43,14 +43,17 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_NUM_COUNTERS 4
+#define AMD64_NUM_COUNTERS_F15H 6
+#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
/*
* Intel "Architectural Performance Monitoring" CPUID
@@ -110,6 +113,35 @@ union cpuid10_edx {
*/
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES 0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL (1U<<0)
+#define IBS_CAPS_FETCHSAM (1U<<1)
+#define IBS_CAPS_OPSAM (1U<<2)
+#define IBS_CAPS_RDWROPCNT (1U<<3)
+#define IBS_CAPS_OPCNT (1U<<4)
+#define IBS_CAPS_BRNTRGT (1U<<5)
+#define IBS_CAPS_OPCNTEXT (1U<<6)
+
+#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
+ | IBS_CAPS_FETCHSAM \
+ | IBS_CAPS_OPSAM)
+
+/*
+ * IBS APIC setup
+ */
+#define IBSCTL 0x1cc
+#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
+#define IBSCTL_LVT_OFFSET_MASK 0x0F
+
/* IbsFetchCtl bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index ee9436c..21c7028 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -392,7 +392,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = 4,
+ .num_counters = AMD64_NUM_COUNTERS,
.cntval_bits = 48,
.cntval_mask = (1ULL << 48) - 1,
.apic = 1,
@@ -556,7 +556,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.perfctr = MSR_F15H_PERF_CTR,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = 6,
+ .num_counters = AMD64_NUM_COUNTERS_F15H,
.cntval_bits = 48,
.cntval_mask = (1ULL << 48) - 1,
.apic = 1,
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9cbb710..e947e5c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,8 +29,6 @@
#include "op_x86_model.h"
#include "op_counter.h"
-#define NUM_COUNTERS 4
-#define NUM_COUNTERS_F15H 6
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
#define NUM_VIRT_COUNTERS 32
#else
@@ -70,35 +68,6 @@ static struct ibs_config ibs_config;
static struct ibs_state ibs_state;
/*
- * IBS cpuid feature detection
- */
-
-#define IBS_CPUID_FEATURES 0x8000001b
-
-/*
- * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
- * bit 0 is used to indicate the existence of IBS.
- */
-#define IBS_CAPS_AVAIL (1U<<0)
-#define IBS_CAPS_FETCHSAM (1U<<1)
-#define IBS_CAPS_OPSAM (1U<<2)
-#define IBS_CAPS_RDWROPCNT (1U<<3)
-#define IBS_CAPS_OPCNT (1U<<4)
-#define IBS_CAPS_BRNTRGT (1U<<5)
-#define IBS_CAPS_OPCNTEXT (1U<<6)
-
-#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
- | IBS_CAPS_FETCHSAM \
- | IBS_CAPS_OPSAM)
-
-/*
- * IBS APIC setup
- */
-#define IBSCTL 0x1cc
-#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
-#define IBSCTL_LVT_OFFSET_MASK 0x0F
-
-/*
* IBS randomization macros
*/
#define IBS_RANDOM_BITS 12
@@ -439,7 +408,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
goto fail;
}
/* both registers must be reserved */
- if (num_counters == NUM_COUNTERS_F15H) {
+ if (num_counters == AMD64_NUM_COUNTERS_F15H) {
msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
} else {
@@ -741,9 +710,9 @@ static int op_amd_init(struct oprofile_operations *ops)
ops->create_files = setup_ibs_files;
if (boot_cpu_data.x86 == 0x15) {
- num_counters = NUM_COUNTERS_F15H;
+ num_counters = AMD64_NUM_COUNTERS_F15H;
} else {
- num_counters = NUM_COUNTERS;
+ num_counters = AMD64_NUM_COUNTERS;
}
op_amd_spec.num_counters = num_counters;
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/7] perf, x86: Implement IBS initialization
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
2011-09-07 16:36 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-07 16:36 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
This patch implements IBS feature detection and initialzation. The
code is shared between perf and oprofile. If ibs is available on the
system for perf, a pmu is setup.
V2:
* Remove printks in pmu function stubs.
* Modify perf_event_ibs_init() to use ibs_caps directly.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/include/asm/perf_event.h | 2 +
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 292 ++++++++++++++++++++++++++++++
arch/x86/oprofile/nmi_int.c | 2 -
arch/x86/oprofile/op_model_amd.c | 197 --------------------
arch/x86/oprofile/op_x86_model.h | 1 -
6 files changed, 295 insertions(+), 201 deletions(-)
create mode 100644 arch/x86/kernel/cpu/perf_event_amd_ibs.c
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index bc801ac..e7d2f15 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -156,6 +156,8 @@ union cpuid10_edx {
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+extern u32 get_ibs_caps(void);
+
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981..ab70cd1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
new file mode 100644
index 0000000..1ae5154
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -0,0 +1,292 @@
+/*
+ * Performance events - AMD IBS
+ *
+ * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+static u32 ibs_caps;
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+static struct pmu perf_ibs;
+
+static int perf_ibs_init(struct perf_event *event)
+{
+ if (perf_ibs.type != event->attr.type)
+ return -ENOENT;
+ return 0;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+ return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+}
+
+static struct pmu perf_ibs = {
+ .event_init= perf_ibs_init,
+ .add= perf_ibs_add,
+ .del= perf_ibs_del,
+};
+
+static __init int perf_event_ibs_init(void)
+{
+ if (!ibs_caps)
+ return -ENODEV; /* ibs not supported by the cpu */
+
+ perf_pmu_register(&perf_ibs, "ibs", -1);
+ printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+
+ return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+ u32 caps;
+ unsigned int max_level;
+
+ if (!boot_cpu_has(X86_FEATURE_IBS))
+ return 0;
+
+ /* check IBS cpuid feature flags */
+ max_level = cpuid_eax(0x80000000);
+ if (max_level < IBS_CPUID_FEATURES)
+ return IBS_CAPS_DEFAULT;
+
+ caps = cpuid_eax(IBS_CPUID_FEATURES);
+ if (!(caps & IBS_CAPS_AVAIL))
+ /* cpuid flags not valid */
+ return IBS_CAPS_DEFAULT;
+
+ return caps;
+}
+
+u32 get_ibs_caps(void)
+{
+ return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+ return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+ return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+ int offset;
+ u64 val;
+ int valid = 0;
+
+ preempt_disable();
+
+ rdmsrl(MSR_AMD64_IBSCTL, val);
+ offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+ if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+ pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ goto out;
+ }
+
+ if (!get_eilvt(offset)) {
+ pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ goto out;
+ }
+
+ valid = 1;
+out:
+ preempt_enable();
+
+ return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+ struct pci_dev *cpu_cfg;
+ int nodes;
+ u32 value = 0;
+
+ nodes = 0;
+ cpu_cfg = NULL;
+ do {
+ cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+ PCI_DEVICE_ID_AMD_10H_NB_MISC,
+ cpu_cfg);
+ if (!cpu_cfg)
+ break;
+ ++nodes;
+ pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+ | IBSCTL_LVT_OFFSET_VALID);
+ pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+ if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+ pci_dev_put(cpu_cfg);
+ printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
+ "IBSCTL = 0x%08x\n", value);
+ return -EINVAL;
+ }
+ } while (1);
+
+ if (!nodes) {
+ printk(KERN_DEBUG "No CPU node configured for IBS\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static int force_ibs_eilvt_setup(void)
+{
+ int offset;
+ int ret;
+
+ preempt_disable();
+ /* find the next free available EILVT entry, skip offset 0 */
+ for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+ if (get_eilvt(offset))
+ break;
+ }
+ preempt_enable();
+
+ if (offset == APIC_EILVT_NR_MAX) {
+ printk(KERN_DEBUG "No EILVT entry available\n");
+ return -EBUSY;
+ }
+
+ ret = setup_ibs_ctl(offset);
+ if (ret)
+ goto out;
+
+ if (!ibs_eilvt_valid()) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
+ pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+
+ return 0;
+out:
+ preempt_disable();
+ put_eilvt(offset);
+ preempt_enable();
+ return ret;
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD64_IBSCTL, val);
+ if (!(val & IBSCTL_LVT_OFFSET_VALID))
+ return -EINVAL;
+
+ return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+ int offset;
+
+ offset = get_ibs_lvt_offset();
+ if (offset < 0)
+ goto failed;
+
+ if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+ return;
+failed:
+ pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+ smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+ int offset;
+
+ offset = get_ibs_lvt_offset();
+ if (offset >= 0)
+ setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+static int __cpuinit
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_STARTING:
+ setup_APIC_ibs(NULL);
+ break;
+ case CPU_DYING:
+ clear_APIC_ibs(NULL);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+ u32 caps;
+ int ret;
+
+ caps = __get_ibs_caps();
+ if (!caps)
+ return -ENODEV; /* ibs not supported by the cpu */
+
+ if (!ibs_eilvt_valid()) {
+ ret = force_ibs_eilvt_setup();
+ if (ret) {
+ pr_err("Failed to setup IBS, %d\n", ret);
+ return ret;
+ }
+ }
+
+ get_online_cpus();
+ ibs_caps = caps;
+ /* make ibs_caps visible to other cpus: */
+ smp_mb();
+ perf_cpu_notifier(perf_ibs_cpu_notifier);
+ smp_call_function(setup_APIC_ibs, NULL, 1);
+ put_online_cpus();
+
+ return perf_event_ibs_init();
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index adf8fb3..c04dc14 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -385,8 +385,6 @@ static void nmi_cpu_shutdown(void *dummy)
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
apic_write(APIC_LVTERR, v);
nmi_cpu_restore_registers(msrs);
- if (model->cpu_down)
- model->cpu_down();
}
static void nmi_cpu_up(void *dummy)
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index e947e5c..303f086 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -74,27 +74,6 @@ static struct ibs_state ibs_state;
#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
-static u32 get_ibs_caps(void)
-{
- u32 ibs_caps;
- unsigned int max_level;
-
- if (!boot_cpu_has(X86_FEATURE_IBS))
- return 0;
-
- /* check IBS cpuid feature flags */
- max_level = cpuid_eax(0x80000000);
- if (max_level < IBS_CPUID_FEATURES)
- return IBS_CAPS_DEFAULT;
-
- ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
- if (!(ibs_caps & IBS_CAPS_AVAIL))
- /* cpuid flags not valid */
- return IBS_CAPS_DEFAULT;
-
- return ibs_caps;
-}
-
/*
* 16-bit Linear Feedback Shift Register (LFSR)
*
@@ -285,81 +264,6 @@ static void op_amd_stop_ibs(void)
wrmsrl(MSR_AMD64_IBSOPCTL, 0);
}
-static inline int get_eilvt(int offset)
-{
- return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
- return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-static inline int ibs_eilvt_valid(void)
-{
- int offset;
- u64 val;
- int valid = 0;
-
- preempt_disable();
-
- rdmsrl(MSR_AMD64_IBSCTL, val);
- offset = val & IBSCTL_LVT_OFFSET_MASK;
-
- if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
- pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
- smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
- goto out;
- }
-
- if (!get_eilvt(offset)) {
- pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
- smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
- goto out;
- }
-
- valid = 1;
-out:
- preempt_enable();
-
- return valid;
-}
-
-static inline int get_ibs_offset(void)
-{
- u64 val;
-
- rdmsrl(MSR_AMD64_IBSCTL, val);
- if (!(val & IBSCTL_LVT_OFFSET_VALID))
- return -EINVAL;
-
- return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void)
-{
- int offset;
-
- offset = get_ibs_offset();
- if (offset < 0)
- goto failed;
-
- if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
- return;
-failed:
- pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
- smp_processor_id());
-}
-
-static void clear_APIC_ibs(void)
-{
- int offset;
-
- offset = get_ibs_offset();
- if (offset >= 0)
- setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -473,15 +377,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
val |= op_x86_get_ctrl(model, &counter_config[virt]);
wrmsrl(msrs->controls[i].addr, val);
}
-
- if (ibs_caps)
- setup_APIC_ibs();
-}
-
-static void op_amd_cpu_shutdown(void)
-{
- if (ibs_caps)
- clear_APIC_ibs();
}
static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -544,86 +439,6 @@ static void op_amd_stop(struct op_msrs const * const msrs)
op_amd_stop_ibs();
}
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
- struct pci_dev *cpu_cfg;
- int nodes;
- u32 value = 0;
-
- nodes = 0;
- cpu_cfg = NULL;
- do {
- cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
- PCI_DEVICE_ID_AMD_10H_NB_MISC,
- cpu_cfg);
- if (!cpu_cfg)
- break;
- ++nodes;
- pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
- | IBSCTL_LVT_OFFSET_VALID);
- pci_read_config_dword(cpu_cfg, IBSCTL, &value);
- if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
- pci_dev_put(cpu_cfg);
- printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
- "IBSCTL = 0x%08x\n", value);
- return -EINVAL;
- }
- } while (1);
-
- if (!nodes) {
- printk(KERN_DEBUG "No CPU node configured for IBS\n");
- return -ENODEV;
- }
-
- return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-
- * amd_cpu_shutdown() using the new offset.
- */
-static int force_ibs_eilvt_setup(void)
-{
- int offset;
- int ret;
-
- preempt_disable();
- /* find the next free available EILVT entry, skip offset 0 */
- for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
- if (get_eilvt(offset))
- break;
- }
- preempt_enable();
-
- if (offset == APIC_EILVT_NR_MAX) {
- printk(KERN_DEBUG "No EILVT entry available\n");
- return -EBUSY;
- }
-
- ret = setup_ibs_ctl(offset);
- if (ret)
- goto out;
-
- if (!ibs_eilvt_valid()) {
- ret = -EFAULT;
- goto out;
- }
-
- pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
- pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
-
- return 0;
-out:
- preempt_disable();
- put_eilvt(offset);
- preempt_enable();
- return ret;
-}
-
/*
* check and reserve APIC extended interrupt LVT offset for IBS if
* available
@@ -636,17 +451,6 @@ static void init_ibs(void)
if (!ibs_caps)
return;
- if (ibs_eilvt_valid())
- goto out;
-
- if (!force_ibs_eilvt_setup())
- goto out;
-
- /* Failed to setup ibs */
- ibs_caps = 0;
- return;
-
-out:
printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
}
@@ -729,7 +533,6 @@ struct op_x86_model_spec op_amd_spec = {
.init = op_amd_init,
.fill_in_addresses = &op_amd_fill_in_addresses,
.setup_ctrs = &op_amd_setup_ctrs,
- .cpu_down = &op_amd_cpu_shutdown,
.check_ctrs = &op_amd_check_ctrs,
.start = &op_amd_start,
.stop = &op_amd_stop,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 89017fa..71e8a67 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -43,7 +43,6 @@ struct op_x86_model_spec {
int (*fill_in_addresses)(struct op_msrs * const msrs);
void (*setup_ctrs)(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs);
- void (*cpu_down)(void);
int (*check_ctrs)(struct pt_regs * const regs,
struct op_msrs const * const msrs);
void (*start)(struct op_msrs const * const msrs);
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/7] perf, x86: Implement IBS event configuration
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
2011-09-07 16:36 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
2011-09-07 16:36 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-14 15:35 ` Peter Zijlstra
2011-09-07 16:36 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
` (3 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
This patch implements perf configuration for AMD IBS. The IBS pmu is
selected using the type attribute in sysfs. There are two types of ibs
pmus, for instruction fetch (IBS_FETCH) and for instruction execution
(IBS_OP):
/sys/bus/event_source/devices/ibs_fetch/type
/sys/bus/event_source/devices/ibs_op/type
Except for the sample period IBS can only be set up with raw config
values and raw data samples. The event attributes for the syscall
should be programmed like this (IBS_FETCH):
type = get_pmu_type("/sys/bus/event_source/devices/ibs_fetch/type");
memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
attr.config = IBS_FETCH_CONFIG_DEFAULT;
This implementation does not yet support 64 bit counters. It is
limited to the hardware counter bit width which is 20 bits. 64 bit
support can be added later.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 88 +++++++++++++++++++++++++++---
1 files changed, 81 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 1ae5154..3b477ad 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -14,12 +14,67 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+};
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (!perf_ibs)
return -ENOENT;
+
+ config = event->attr.config;
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (hwc->sample_period & 0x0f)
+ /* lower 4 bits can not be set in ibs max cnt */
+ return -EINVAL;
+ max_cnt = hwc->sample_period >> 4;
+ if (max_cnt & ~perf_ibs->cnt_mask)
+ /* out of range */
+ return -EINVAL;
+ config |= max_cnt;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!max_cnt)
+ return -EINVAL;
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
return 0;
}
@@ -32,10 +87,28 @@ static void perf_ibs_del(struct perf_event *event, int flags)
{
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+};
+
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
};
static __init int perf_event_ibs_init(void)
@@ -43,7 +116,8 @@ static __init int perf_event_ibs_init(void)
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
+ perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/7] perf, x86: Implement IBS interrupt handler
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
` (2 preceding siblings ...)
2011-09-07 16:36 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-14 16:13 ` Peter Zijlstra
2011-09-07 16:36 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
` (2 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
This patch implements code to handle ibs interrupts. If ibs data is
available a raw perf_event data sample is created and sent back to the
userland. This patch only implements the storage of ibs data in the
raw sample, but this could be extended in a later patch by generating
generic event data such as the rip from the ibs sampling data.
V2:
* Added bit mask for msr offsets.
* Added caps field to raw sample format.
* Rebase on Don's NMI patches that introduce register_nmi_handler().
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/include/asm/msr-index.h | 5 ++
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 84 ++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d52609a..eca3363 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -127,6 +127,8 @@
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -134,8 +136,11 @@
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b477ad..ffa53c9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -14,6 +14,11 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
@@ -23,6 +28,18 @@ struct perf_ibs {
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
+ u64 valid_mask;
+ unsigned long offset_mask[1];
+ int offset_max;
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
static struct perf_ibs perf_ibs_fetch;
@@ -97,6 +114,9 @@ static struct perf_ibs perf_ibs_fetch = {
.config_mask = IBS_FETCH_CONFIG_MASK,
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
};
static struct perf_ibs perf_ibs_op = {
@@ -109,8 +129,71 @@ static struct perf_ibs perf_ibs_op = {
.config_mask = IBS_OP_CONFIG_MASK,
.cnt_mask = IBS_OP_MAX_CNT,
.enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
};
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct perf_event *event = NULL;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size;
+ unsigned int msr;
+ u64 *buf;
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ return 0;
+
+ perf_sample_data_init(&data, 0);
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < perf_ibs->offset_max);
+ raw.size = sizeof(u32) + sizeof(u64) * size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ regs = *iregs; /* XXX: update ip from ibs sample */
+
+ if (perf_event_overflow(event, &data, ®s))
+ ; /* stop */
+ else
+ /* reenable */
+ wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+
+ return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
@@ -118,6 +201,7 @@ static __init int perf_event_ibs_init(void)
perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
+ register_nmi_handler(NMI_LOCAL, &perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/7] perf, x86: Implement IBS pmu control ops
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
` (3 preceding siblings ...)
2011-09-07 16:36 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-07 16:36 ` [PATCH 6/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
2011-09-07 16:36 ` [PATCH 7/7] perf, x86: Example code for AMD IBS Robert Richter
6 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
Add code to control the IBS pmu. We need to maintain per-cpu
states. Since some states are used and changed by the nmi handler,
access to these states must be atomic.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 106 +++++++++++++++++++++++++++++-
1 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index ffa53c9..f31f868 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -22,6 +22,19 @@ static u32 ibs_caps;
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
struct perf_ibs {
struct pmu pmu;
unsigned int msr;
@@ -31,6 +44,7 @@ struct perf_ibs {
u64 valid_mask;
unsigned long offset_mask[1];
int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
};
struct perf_ibs_data {
@@ -95,20 +109,74 @@ static int perf_ibs_init(struct perf_event *event)
return 0;
}
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_STARTED, pcpu->state))
+ return;
+
+ wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 val;
+
+ if (!test_and_clear_bit(IBS_STARTED, pcpu->state))
+ return;
+
+ set_bit(IBS_STOPPING, pcpu->state);
+
+ rdmsrl(hwc->config_base, val);
+ val &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, val);
+}
+
static int perf_ibs_add(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, 0);
+
+ pcpu->event = NULL;
}
+static void perf_ibs_read(struct perf_event *event) { }
+
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.event_init = perf_ibs_init,
.add = perf_ibs_add,
.del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
},
.msr = MSR_AMD64_IBSFETCHCTL,
.config_mask = IBS_FETCH_CONFIG_MASK,
@@ -124,6 +192,9 @@ static struct perf_ibs perf_ibs_op = {
.event_init = perf_ibs_init,
.add = perf_ibs_add,
.del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
@@ -136,7 +207,8 @@ static struct perf_ibs perf_ibs_op = {
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
- struct perf_event *event = NULL;
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
struct hw_perf_event *hwc = &event->hw;
struct perf_sample_data data;
struct perf_raw_record raw;
@@ -146,6 +218,14 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
unsigned int msr;
u64 *buf;
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+ /* Catch spurious interrupts after stopping IBS: */
+ if (!test_and_clear_bit(IBS_STOPPING, pcpu->state))
+ return 0;
+ rdmsrl(perf_ibs->msr, *ibs_data.regs);
+ return (*ibs_data.regs & perf_ibs->valid_mask);
+ }
+
msr = hwc->config_base;
buf = ibs_data.regs;
rdmsrl(msr, *buf);
@@ -194,13 +274,33 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
return handled;
}
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
- perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, &perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/7] perf, x86: Implement 64 bit counter support for IBS
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
` (4 preceding siblings ...)
2011-09-07 16:36 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
2011-09-07 16:36 ` [PATCH 7/7] perf, x86: Example code for AMD IBS Robert Richter
6 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
This patch implements 64 bit counter support for IBS. The sampling
period is no longer limited to the hw counter width.
The functions perf_event_set_period() and perf_event_try_update() can
be used as generic functions. They can replace similar code that is
duplicate across architectures.
V2: Added caps check for IBS_OP_CUR_CNT emulation.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
arch/x86/include/asm/perf_event.h | 2 +
arch/x86/kernel/cpu/perf_event_amd_ibs.c | 204 +++++++++++++++++++++++++++---
2 files changed, 185 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index e7d2f15..cc44a1a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -150,6 +150,8 @@ union cpuid10_edx {
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
/* IbsOpCtl bits */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index f31f868..11da65b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -42,9 +42,11 @@ struct perf_ibs {
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
+ u64 max_period;
unsigned long offset_mask[1];
int offset_max;
struct cpu_perf_ibs __percpu *pcpu;
+ u64 (*get_count)(u64 config);
};
struct perf_ibs_data {
@@ -56,6 +58,78 @@ struct perf_ibs_data {
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *count)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < min))
+ left = min;
+
+ if (left > max)
+ left = max;
+
+ *count = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
static struct perf_ibs perf_ibs_fetch;
static struct perf_ibs perf_ibs_op;
@@ -89,18 +163,14 @@ static int perf_ibs_init(struct perf_event *event)
if (hwc->sample_period & 0x0f)
/* lower 4 bits can not be set in ibs max cnt */
return -EINVAL;
- max_cnt = hwc->sample_period >> 4;
- if (max_cnt & ~perf_ibs->cnt_mask)
- /* out of range */
- return -EINVAL;
- config |= max_cnt;
} else {
max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
event->attr.sample_period = max_cnt << 4;
hwc->sample_period = event->attr.sample_period;
}
- if (!max_cnt)
+ if (!hwc->sample_period)
return -EINVAL;
hwc->config_base = perf_ibs->msr;
@@ -109,16 +179,71 @@ static int perf_ibs_init(struct perf_event *event)
return 0;
}
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int ret;
+
+ /* ignore lower 4 bits in min count: */
+ ret = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return ret;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ return (config & IBS_OP_CUR_CNT) >> 32;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 config)
+{
+ u64 count = perf_ibs->get_count(config);
+
+ while (!perf_event_try_update(event, count, 20)) {
+ rdmsrl(event->hw.config_base, config);
+ count = perf_ibs->get_count(config);
+ }
+}
+
+/* Note: The enable mask must be encoded in the config argument. */
+static inline void perf_ibs_enable_event(struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
static void perf_ibs_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
- if (test_and_set_bit(IBS_STARTED, pcpu->state))
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
- wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &config);
+ config = (config >> 4) | perf_ibs->enable_mask;
+ set_bit(IBS_STARTED, pcpu->state);
+ perf_ibs_enable_event(hwc, config);
+
+ perf_event_update_userpage(event);
}
static void perf_ibs_stop(struct perf_event *event, int flags)
@@ -127,15 +252,28 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
u64 val;
+ int stopping;
- if (!test_and_clear_bit(IBS_STARTED, pcpu->state))
- return;
+ stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
- set_bit(IBS_STOPPING, pcpu->state);
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
rdmsrl(hwc->config_base, val);
- val &= ~perf_ibs->enable_mask;
- wrmsrl(hwc->config_base, val);
+
+ if (stopping) {
+ set_bit(IBS_STOPPING, pcpu->state);
+ val &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, val);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ perf_ibs_event_update(perf_ibs, event, val);
+ hwc->state |= PERF_HES_UPTODATE;
}
static int perf_ibs_add(struct perf_event *event, int flags)
@@ -146,6 +284,8 @@ static int perf_ibs_add(struct perf_event *event, int flags)
if (test_and_set_bit(IBS_ENABLED, pcpu->state))
return -ENOSPC;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
pcpu->event = event;
if (flags & PERF_EF_START)
@@ -162,9 +302,11 @@ static void perf_ibs_del(struct perf_event *event, int flags)
if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
return;
- perf_ibs_stop(event, 0);
+ perf_ibs_stop(event, PERF_EF_UPDATE);
pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
}
static void perf_ibs_read(struct perf_event *event) { }
@@ -183,8 +325,11 @@ static struct perf_ibs perf_ibs_fetch = {
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
.valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+
+ .get_count = get_ibs_fetch_count,
};
static struct perf_ibs perf_ibs_op = {
@@ -201,8 +346,11 @@ static struct perf_ibs perf_ibs_op = {
.cnt_mask = IBS_OP_MAX_CNT,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+
+ .get_count = get_ibs_op_count,
};
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
@@ -214,9 +362,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_raw_record raw;
struct pt_regs regs;
struct perf_ibs_data ibs_data;
- int offset, size;
+ int offset, size, overflow, reenable;
unsigned int msr;
- u64 *buf;
+ u64 *buf, config;
if (!test_bit(IBS_STARTED, pcpu->state)) {
/* Catch spurious interrupts after stopping IBS: */
@@ -251,11 +399,25 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
regs = *iregs; /* XXX: update ip from ibs sample */
- if (perf_event_overflow(event, &data, ®s))
- ; /* stop */
- else
- /* reenable */
- wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+ /*
+ * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
+ * supported in all cpus. As this triggered an interrupt, we
+ * set the current count to the max count.
+ */
+ config = ibs_data.regs[0];
+ if (perf_ibs == &perf_ibs_op && !(ibs_caps & IBS_CAPS_RDWROPCNT)) {
+ config &= ~IBS_OP_CUR_CNT;
+ config |= (config & IBS_OP_MAX_CNT) << 36;
+ }
+
+ perf_ibs_event_update(perf_ibs, event, config);
+
+ overflow = perf_ibs_set_period(perf_ibs, hwc, &config);
+ reenable = !(overflow && perf_event_overflow(event, &data, ®s));
+ config = (config >> 4) | (reenable ? perf_ibs->enable_mask : 0);
+ perf_ibs_enable_event(hwc, config);
+
+ perf_event_update_userpage(event);
return 1;
}
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 7/7] perf, x86: Example code for AMD IBS
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
` (5 preceding siblings ...)
2011-09-07 16:36 ` [PATCH 6/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
@ 2011-09-07 16:36 ` Robert Richter
6 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-07 16:36 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter
This patch includes an example to use IBS via perf_event.
usage: ibs [-h]
ibs ibs_fetch | ibs_op [-s] [-C CPU] [-m BUFFERPAGES] <command>
<command>
Command to execute.
-e CONFIG
64 bit configuration value, refers to msrs
IbsFetchCtl (0xC0011030) or IbsOpCtl (0xC0011033).
The default sample period is set to 100000.
-c COUNT
Event period to sample (default: 100000).
-h
Print help.
-s
system wide profiling (set per default)
-C CPU
profile on CPU (not yet implemented)
-m BUFFERPAGES
Per-cpu buffer pages to allocate.
V2:
* Updated include header files to fix build errors on some distros.
* Caps field added to sampling format.
* Note: I kept example code for reference, the patch must not be
applied. I will come up with a sulution that integrates IBS into
perf-report.
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
tools/perf/Documentation/examples/Makefile | 44 +++
tools/perf/Documentation/examples/ibs.c | 445 ++++++++++++++++++++++++++++
2 files changed, 489 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/Documentation/examples/Makefile
create mode 100644 tools/perf/Documentation/examples/ibs.c
diff --git a/tools/perf/Documentation/examples/Makefile b/tools/perf/Documentation/examples/Makefile
new file mode 100644
index 0000000..cfc9647
--- /dev/null
+++ b/tools/perf/Documentation/examples/Makefile
@@ -0,0 +1,44 @@
+all: ibs
+
+CFLAGS += -I../..
+CFLAGS += -I../../util/include
+CFLAGS += -DNO_NEWT_SUPPORT
+
+LIB_FILE=../../libperf.a
+
+INSTALL = install
+
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/
+endif
+
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+ifndef DESTDIR
+prefix = $(HOME)
+endif
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+
+../../libperf.a:
+ $(MAKE) CFLAGS="-DNO_NEWT_SUPPORT" -C ../.. libperf.a
+
+$(OUTPUT)ibs: ibs.c $(LIB_FILE)
+ $(CC) $(CFLAGS) $^ -o $@
+
+clean:
+ $(MAKE) -C ../.. clean
+ $(RM) ibs
+
+install: all
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
+ $(INSTALL) $(OUTPUT)ibs '$(DESTDIR_SQ)$(bindir_SQ)'
+
+.PHONY: all clean install
diff --git a/tools/perf/Documentation/examples/ibs.c b/tools/perf/Documentation/examples/ibs.c
new file mode 100644
index 0000000..e4ad012
--- /dev/null
+++ b/tools/perf/Documentation/examples/ibs.c
@@ -0,0 +1,445 @@
+/*
+ * IBS sampling example
+ *
+ * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * Sample code that attaches an event to a specified PMU.
+ *
+ * Compiling:
+ *
+ * $ cd linux # Linux kernel source dir
+ * $ make -C tools/perf/Documentation/examples ibs
+ *
+ * Running:
+ *
+ * $ ./ibs ibs_fetch -s -m 256 <command>
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+
+struct perf_config {
+ uint64_t config;
+ uint64_t sample_period;
+ char *sysfs;
+ int pid;
+ int cpu;
+ int mmap_pages;
+ char **argv;
+};
+
+static uint64_t collected_samples, lost_samples, sum_period;
+
+static void usage(void)
+{
+ printf(
+"usage: ibs [-h]\n"
+" ibs ibs_fetch | ibs_op [-s] [-C CPU] [-m BUFFERPAGES] <command>\n"
+"\n"
+" <command>\n"
+" Command to execute.\n"
+"\n"
+" -e CONFIG\n"
+" 64 bit configuration value, refers to msrs\n"
+" IbsFetchCtl (0xC0011030) or IbsOpCtl (0xC0011033).\n"
+" The default sample period is set to 100000.\n"
+"\n"
+" -c COUNT\n"
+" Event period to sample (default: 100000).\n"
+"\n"
+" -h\n"
+" Print help.\n"
+"\n"
+" -s\n"
+" system wide profiling (set per default)\n"
+"\n"
+" -C CPU\n"
+" profile on CPU (not yet implemented)\n"
+"\n"
+" -m BUFFERPAGES\n"
+" Per-cpu buffer pages to allocate.\n"
+);
+ exit(0);
+}
+
+#define IBS_FETCH_DEFAULT ((1ULL<<57)|(100000ULL>>4))
+#define IBS_OP_DEFAULT ((0ULL<<19)|(100000ULL>>4))
+
+#define IBS_MAX_CNT 0x0000FFFFULL
+
+#define IBS_FETCH_SYSFS "/sys/bus/event_source/devices/ibs_fetch/type"
+#define IBS_OP_SYSFS "/sys/bus/event_source/devices/ibs_op/type"
+
+static int ibs_config(struct perf_config *config, int argc, char **argv)
+{
+ int c;
+
+ memset(config, 0, sizeof(*config));
+ config->pid = -1; /* support for system wide profiling only */
+ config->cpu = -1;
+ config->mmap_pages = 1; /* need buffer for ibs */
+
+ c = getopt(argc, argv,"+h");
+ if (c != -1 || !argv[optind]) {
+ usage();
+ exit(0);
+ }
+
+ if (!strcmp(argv[optind], "ibs_fetch")) {
+ config->sysfs = IBS_FETCH_SYSFS;
+ config->config = IBS_FETCH_DEFAULT;
+ } else if (!strcmp(argv[optind], "ibs_op")) {
+ config->sysfs = IBS_OP_SYSFS;
+ config->config = IBS_OP_DEFAULT;
+ } else {
+ errx(1, "specify ibs_fetch or ibs_op\n");
+ }
+
+ optind++;
+
+ while (1) {
+ c = getopt(argc, argv,"+he:c:sC:m:v");
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage();
+ exit(0);
+ case 'e':
+ /* event configuration */
+ config->config = atoll(optarg);
+ break;
+ case 'c':
+ /* sample period */
+ config->sample_period = atoll(optarg);
+ config->config &= ~IBS_MAX_CNT;
+ if (!config->sample_period)
+ errx(1, "invalid sample period");
+ break;
+ case 's':
+ /* system wide profiling */
+ if (config->pid)
+ break;
+ config->pid = -1;
+ config->cpu = -1;
+ break;
+ case 'C':
+ /* profile cpu */
+ config->pid = -1;
+ config->cpu = atoi(optarg);
+ break;
+ case 'm':
+ config->mmap_pages = atoi(optarg);
+ break;
+ default:
+ errx(1, "unknown option");
+ }
+ }
+
+ if (!argv[optind])
+ errx(1, "you must specify a command to execute\n");
+
+ config->argv = argv + optind;
+
+ if (config->mmap_pages > 1 && ((config->mmap_pages) & 0x1))
+ errx(1, "number of pages must be power of 2\n");
+
+ return 0;
+}
+
+#define BUFSIZ_ATOI 32
+
+static int get_pmu_type(char *sysfs)
+{ int pmu, ret = 0;
+ char buf[BUFSIZ_ATOI];
+ size_t size;
+
+ pmu = open(sysfs, O_RDONLY);
+ if (pmu == -1)
+ return -errno;
+ size = read(pmu, buf, BUFSIZ - 1);
+ if (size < 0)
+ ret = -errno;
+ close(pmu);
+
+ if (ret)
+ return ret;
+
+ buf[size] = '0';
+
+ return atoi(buf);
+}
+
+static volatile int done = 0;
+
+static void cld_handler(int n)
+{
+ done = 1;
+}
+
+static int child(char **arg)
+{
+ ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+ execvp(arg[0], arg);
+ return -1;
+}
+
+struct ibs_data {
+ uint32_t caps;
+ uint64_t regs[0];
+} __attribute__ ((packed));
+
+static void print_ibs_fetch(int cpu, struct ibs_data *__ibs)
+{
+ uint64_t *ibs = __ibs->regs;
+ printf("IBS_fetch sample on cpu%d\tIBS0: 0x%016"PRIx64" IBS1: 0x%016"PRIx64" IBS2:0x%016"PRIx64"\n",
+ cpu, ibs[0], ibs[1], ibs[2]);
+}
+
+static void print_ibs_op(int cpu, struct ibs_data *__ibs)
+{
+ uint64_t *ibs = __ibs->regs;
+ printf("IBS_OP sample on cpu%d\t"
+ "\t IBS0: 0x%016"PRIx64" IBS1: 0x%016"PRIx64" IBS2: 0x%016"PRIx64"\n"
+ "\tIBS3: 0x%016"PRIx64" IBS4: 0x%016"PRIx64" IBS5: 0x%016"PRIx64" IBS6: 0x%016"PRIx64"\n",
+ cpu, ibs[0], ibs[1], ibs[2], ibs[3], ibs[4], ibs[5], ibs[6]);
+}
+
+#define MSR_AMD64_IBSFETCH_SIZE 3
+#define MSR_AMD64_IBSOP_SIZE 7
+
+static int print_ibs(struct perf_sample *sample)
+{
+ switch (sample->raw_size >> 3) {
+ case MSR_AMD64_IBSFETCH_SIZE:
+ print_ibs_fetch(sample->cpu, sample->raw_data);
+ return 0;
+ case MSR_AMD64_IBSOP_SIZE:
+ print_ibs_op(sample->cpu, sample->raw_data);
+ return 0;
+ default:
+ printf("invalid: raw_size = %d, p = %p\n",
+ sample->raw_size, (u64*)sample->raw_data);
+ return -EINVAL;
+ }
+}
+
+static void print_event(union perf_event *event)
+{
+ int idx, size = event->sample.header.size;
+ u64 *val = event->sample.array;
+
+ printf("unrecognized event, type = %d, size = %d, header = 0x%016"PRIx64":\n",
+ event->sample.header.type, size, *(u64*)&event->sample.header);
+
+ for (idx = 1; size > 0; idx++, size -= 8) {
+ printf(" 0x%016"PRIx64, *val++);
+ if (!(idx % 8))
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static int ibs_run(struct perf_config *config)
+{
+ struct perf_event_attr attr;
+ struct perf_sample sample;
+ struct perf_evsel *evsel = NULL;
+ struct perf_evlist *evlist = NULL;
+ struct cpu_map *cpus = NULL;
+ struct thread_map *threads = NULL;
+ struct perf_evsel *pos, *n;
+ union perf_event *event;
+ pid_t pid = config->pid;
+ char cpu_list[8];
+ int type, idx, status, ready = 0;
+ int ret = -ENOMEM;
+ static uint64_t ovfl_count; /* static to avoid setjmp issue */
+
+ type = get_pmu_type(config->sysfs);
+ if (type < 0) {
+ fprintf(stderr, "Failed to get pmu type: %d\n", type);
+ return type;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = type;
+ attr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
+ attr.sample_period = config->sample_period;
+ attr.config = config->config;
+
+ evsel = perf_evsel__new(&attr, 0);
+
+ if (config->cpu == -1) {
+ cpus = cpu_map__new(NULL);
+ } else {
+ snprintf(cpu_list, sizeof(cpu_list), "%d", config->cpu);
+ cpus = cpu_map__new(cpu_list);
+ }
+
+ threads = thread_map__new(pid, pid);
+
+ evlist = perf_evlist__new(cpus, threads);
+
+ if (!evsel || !evlist || !cpus || !threads)
+ goto out;
+
+ ret = perf_evsel__alloc_counts(evsel, cpus->nr);
+ if (ret < 0)
+ goto out;
+
+ perf_evlist__add(evlist, evsel);
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 0) < 0) {
+ ret = -errno;
+ fprintf(stderr, "cannot open events, %d\n", ret);
+ goto out;
+ }
+ }
+
+ if (perf_evlist__mmap(evlist, config->mmap_pages, false) < 0) {
+ ret = -errno;
+ fprintf(stderr, "failed to mmap with %d (%s)\n",
+ ret, strerror(ret));
+ goto out;
+ }
+
+ /*
+ * Create the child task
+ */
+ if ((pid=fork()) == -1) {
+ ret = -errno;
+ fprintf(stderr, "cannot fork process\n");
+ goto out;
+ }
+
+ if (pid == 0)
+ exit(child(config->argv));
+
+ /*
+ * wait for the child to exec
+ */
+ ret = waitpid(pid, &status, WUNTRACED);
+ if (ret == -1)
+ err(1, "waitpid failed");
+
+ if (WIFEXITED(status))
+ errx(1, "task %s [%d] exited already status %d\n",
+ config->argv[0], pid, WEXITSTATUS(status));
+
+ /*
+ * effectively activate monitoring
+ */
+ ptrace(PTRACE_DETACH, pid, NULL, 0);
+
+ signal(SIGCHLD, cld_handler);
+
+ /*
+ * core loop
+ */
+ for (ret = 0; !ret; ) {
+ if (done && ready)
+ break;
+ ready = done;
+
+ ret = poll(evlist->pollfd, evlist->nr_fds, done ? 0 : -1);
+
+ if (ret > 0) {
+ ovfl_count += ret;
+ } else if (ret < 0) {
+ ret = -errno;
+ if (ret != -EINTR)
+ break;
+ ret = 0;
+ }
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ if (ret < 0)
+ break;
+ ret = __perf_evsel__read(pos, evlist->cpus->nr,
+ evlist->threads->nr, false);
+ }
+
+ for (idx = 0; !ret, idx < evlist->nr_fds; idx++) {
+ if (done)
+ ioctl(evlist->pollfd[idx].fd,
+ PERF_EVENT_IOC_DISABLE);
+ while (event = perf_evlist__mmap_read(evlist, idx)) {
+ ready = 0;
+ ret = perf_event__parse_sample(event,
+ evsel->attr.sample_type,
+ perf_evsel__sample_size(evsel),
+ false, &sample);
+ if (ret)
+ break;
+ collected_samples++;
+ if (print_ibs(&sample))
+ print_event(event);
+ }
+ }
+ }
+
+ /*
+ * cleanup child
+ */
+ waitpid(pid, &status, 0);
+
+ printf("%"PRIu64" samples collected in %"PRIu64" poll events, %"PRIu64" lost samples\n",
+ collected_samples, ovfl_count, lost_samples);
+ if (collected_samples)
+ printf("avg period=%"PRIu64"\n", sum_period / collected_samples);
+out:
+ if (evlist) {
+ perf_evlist__munmap(evlist);
+ list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+ perf_evsel__close_fd(pos, evlist->cpus->nr,
+ evlist->threads->nr);
+ list_del(&pos->node);
+ }
+ free(evsel->counts);
+ evsel->counts = NULL;
+ perf_evlist__delete_maps(evlist);
+ cpus = NULL;
+ threads = NULL;
+ }
+ free(evsel);
+ free(evlist);
+ free(cpus);
+ free(threads);
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ struct perf_config config;
+ int ret;
+
+ ret = ibs_config(&config, argc, argv);
+ if (ret)
+ goto fail;
+ ret = ibs_run(&config);
+ if (ret)
+ goto fail;
+ return 0;
+fail:
+ printf("An error occurred: %d (%s)\n", -ret, strerror(-ret));
+ return -1;
+}
--
1.7.6.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 3/7] perf, x86: Implement IBS event configuration
2011-09-07 16:36 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
@ 2011-09-14 15:35 ` Peter Zijlstra
0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2011-09-14 15:35 UTC (permalink / raw)
To: Robert Richter; +Cc: Ingo Molnar, Stephane Eranian, LKML
On Wed, 2011-09-07 at 18:36 +0200, Robert Richter wrote:
> +static struct perf_ibs perf_ibs_fetch = {
> + .pmu = {
> + .event_init = perf_ibs_init,
> + .add = perf_ibs_add,
> + .del = perf_ibs_del,
> + },
> + .msr = MSR_AMD64_IBSFETCHCTL,
> + .config_mask = IBS_FETCH_CONFIG_MASK,
> + .cnt_mask = IBS_FETCH_MAX_CNT,
> + .enable_mask = IBS_FETCH_ENABLE,
> +};
> +
> +static struct perf_ibs perf_ibs_op = {
> + .pmu = {
> + .event_init = perf_ibs_init,
> + .add = perf_ibs_add,
> + .del = perf_ibs_del,
> + },
> + .msr = MSR_AMD64_IBSOPCTL,
> + .config_mask = IBS_OP_CONFIG_MASK,
> + .cnt_mask = IBS_OP_MAX_CNT,
> + .enable_mask = IBS_OP_ENABLE,
> };
Both those .pmu things want:
.task_ctx_nr = perf_invalid_context,
disabling per-task events, or you need to create a new context and
somehow fix per-task scheduling for these two things.
Adding two task contexts (one for each type) really isn't nice.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
2011-09-07 16:36 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
@ 2011-09-14 16:13 ` Peter Zijlstra
2011-09-21 8:39 ` Robert Richter
0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-09-14 16:13 UTC (permalink / raw)
To: Robert Richter; +Cc: Ingo Molnar, Stephane Eranian, LKML
On Wed, 2011-09-07 at 18:36 +0200, Robert Richter wrote:
> + if (event->attr.sample_type & PERF_SAMPLE_RAW) {
> + ibs_data.caps = ibs_caps;
> + size = 1;
> + offset = 1;
> + do {
> + rdmsrl(msr + offset, *buf++);
> + size++;
> + offset = find_next_bit(perf_ibs->offset_mask,
> + perf_ibs->offset_max,
> + offset + 1);
> + } while (offset < perf_ibs->offset_max);
> + raw.size = sizeof(u32) + sizeof(u64) * size;
> + raw.data = ibs_data.data;
> + data.raw = &raw;
> + }
Another application of for_each_set_bit_continue() ?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
2011-09-14 16:13 ` Peter Zijlstra
@ 2011-09-21 8:39 ` Robert Richter
0 siblings, 0 replies; 14+ messages in thread
From: Robert Richter @ 2011-09-21 8:39 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML
On 14.09.11 12:13:25, Peter Zijlstra wrote:
> On Wed, 2011-09-07 at 18:36 +0200, Robert Richter wrote:
> > + if (event->attr.sample_type & PERF_SAMPLE_RAW) {
> > + ibs_data.caps = ibs_caps;
> > + size = 1;
> > + offset = 1;
> > + do {
> > + rdmsrl(msr + offset, *buf++);
> > + size++;
> > + offset = find_next_bit(perf_ibs->offset_mask,
> > + perf_ibs->offset_max,
> > + offset + 1);
> > + } while (offset < perf_ibs->offset_max);
> > + raw.size = sizeof(u32) + sizeof(u64) * size;
> > + raw.data = ibs_data.data;
> > + data.raw = &raw;
> > + }
>
> Another application of for_each_set_bit_continue() ?
Peter, I will make this a separate patch set to decouple from this
patch set. Will then update this code too. So in my next ibs patch set
I wont change this here.
-Robert
--
Advanced Micro Devices, Inc.
Operating System Research Center
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2011-09-21 8:40 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-07 16:36 [PATCH 0/7 -v2] perf, x86: Implement AMD IBS Robert Richter
2011-09-07 16:36 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
2011-09-07 16:36 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
2011-09-07 16:36 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
2011-09-14 15:35 ` Peter Zijlstra
2011-09-07 16:36 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
2011-09-14 16:13 ` Peter Zijlstra
2011-09-21 8:39 ` Robert Richter
2011-09-07 16:36 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
2011-09-07 16:36 ` [PATCH 6/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
2011-09-07 16:36 ` [PATCH 7/7] perf, x86: Example code for AMD IBS Robert Richter
-- strict thread matches above, loose matches on Subject: below --
2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement " Robert Richter
2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
2011-08-02 11:35 ` Peter Zijlstra
2011-08-12 19:51 ` Robert Richter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).