[PATCH 4/6] ARM: oprofile: use perf-events framework as backend

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/6] ARM: oprofile: use perf-events framework as backend
  2010-04-12 15:04     ` [PATCH 3/6] perf-events: allow modules to query the maximum number of perf events Will Deacon
@ 2010-04-12 15:04       ` Will Deacon
  0 siblings, 0 replies; 4+ messages in thread
From: Will Deacon @ 2010-04-12 15:04 UTC (permalink / raw)
  To: linux-arm-kernel

There are currently two hardware performance monitoring subsystems in the
kernel for ARM: OProfile and perf-events. This creates the following problems:

1.) Duplicate PMU accessor code. Inevitable code drift may lead to bugs in one
framework that are fixed in the other.

2.) Locking issues. OProfile doesn't reprogram hardware counters between
profiling runs if the events to be monitored have not been changed. This means
that other profiling frameworks cannot use the counters if OProfile is in use.

3.) Due to differences in the two frameworks, it may not be possible to
compare the results obtained by OProfile with those obtained by perf.

This patch removes the OProfile PMU driver code and replaces it with calls
to perf, therefore solving the issues mentioned above.

The only userspace-visible change is the lack of SCU counter support for
11MPCore. This is currently unsupported by OProfile userspace tools anyway and
therefore shouldn't cause any problems.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Jamie Iles <jamie.iles@picochip.com>
Cc: Jean Pihet <jpihet@mvista.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/oprofile/common.c |  333 +++++++++++++++++++++++++++++++++++++-------
 1 files changed, 281 insertions(+), 52 deletions(-)

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 3fcd752..aa36307 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -2,32 +2,183 @@
  * @file common.c
  *
  * @remark Copyright 2004 Oprofile Authors
+ * @remark Copyright 2010 ARM Ltd.
  * @remark Read the file COPYING
  *
  * @author Zwane Mwaikambo
+ * @author Will Deacon [move to perf]
  */
 
+#include <linux/cpumask.h>
+#include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 #include <linux/oprofile.h>
-#include <linux/errno.h>
+#include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <linux/sysdev.h>
-#include <linux/mutex.h>
+#include <asm/stacktrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/perf_event.h>
+#include <asm/ptrace.h>
 
-#include "op_counter.h"
-#include "op_arm_model.h"
+#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+	unsigned long count;
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long unit_mask;
+	unsigned long kernel;
+	unsigned long user;
+	struct perf_event_attr attr;
+};
 
-static struct op_arm_model_spec *op_arm_model;
 static int op_arm_enabled;
 static DEFINE_MUTEX(op_arm_mutex);
 
-struct op_counter_config *counter_config;
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int perf_num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+			struct perf_sample_data *data, struct pt_regs *regs)
+{
+	int id;
+	u32 cpu = smp_processor_id();
+
+	for (id = 0; id < perf_num_counters; ++id)
+		if (perf_events[cpu][id] == event)
+			break;
+
+	if (id != perf_num_counters)
+		oprofile_add_sample(regs, id);
+	else
+		pr_warning("oprofile: ignoring spurious overflow "
+				"on cpu %u\n", cpu);
+}
+
+/*
+ * Called by op_arm_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+	int i;
+	u32 size = sizeof(struct perf_event_attr);
+	struct perf_event_attr *attr;
+
+	for (i = 0; i < perf_num_counters; ++i) {
+		attr = &counter_config[i].attr;
+		memset(attr, 0, size);
+		attr->type		= PERF_TYPE_RAW;
+		attr->size		= size;
+		attr->config		= counter_config[i].event;
+		attr->sample_period	= counter_config[i].count;
+		attr->pinned		= 1;
+	}
+}
+
+static int op_create_counter(int cpu, int event)
+{
+	int ret = 0;
+	struct perf_event *pevent;
+
+	if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
+		return ret;
+
+	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+						  cpu, -1,
+						  op_overflow_handler);
+
+	if (IS_ERR(pevent)) {
+		ret = PTR_ERR(pevent);
+	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_warning("oprofile: failed to enable event %d "
+				"on CPU %d\n", event, cpu);
+		ret = -EBUSY;
+	} else {
+		perf_events[cpu][event] = pevent;
+	}
+
+	return ret;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+	struct perf_event *pevent = perf_events[cpu][event];
+
+	if (pevent) {
+		perf_event_release_kernel(pevent);
+		perf_events[cpu][event] = NULL;
+	}
+}
+
+/*
+ * Called by op_arm_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+	int cpu, event, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		for (event = 0; event < perf_num_counters; ++event) {
+			ret = op_create_counter(cpu, event);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Called by op_arm_stop@the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+	int cpu, event;
+
+	for_each_online_cpu(cpu)
+		for (event = 0; event < perf_num_counters; ++event)
+			op_destroy_counter(cpu, event);
+}
+
+
+static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
+{
+	switch (id) {
+	case ARM_PERF_PMU_ID_XSCALE1:
+		return "arm/xscale1";
+	case ARM_PERF_PMU_ID_XSCALE2:
+		return "arm/xscale2";
+	case ARM_PERF_PMU_ID_V6:
+		return "arm/armv6";
+	case ARM_PERF_PMU_ID_V6MP:
+		return "arm/mpcore";
+	case ARM_PERF_PMU_ID_CA8:
+		return "arm/armv7";
+	case ARM_PERF_PMU_ID_CA9:
+		return "arm/armv7-ca9";
+	default:
+		return NULL;
+	}
+}
 
 static int op_arm_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
-	for (i = 0; i < op_arm_model->num_counters; i++) {
+	for (i = 0; i < perf_num_counters; i++) {
 		struct dentry *dir;
 		char buf[4];
 
@@ -46,12 +197,10 @@ static int op_arm_create_files(struct super_block *sb, struct dentry *root)
 
 static int op_arm_setup(void)
 {
-	int ret;
-
 	spin_lock(&oprofilefs_lock);
-	ret = op_arm_model->setup_ctrs();
+	op_perf_setup();
 	spin_unlock(&oprofilefs_lock);
-	return ret;
+	return 0;
 }
 
 static int op_arm_start(void)
@@ -60,8 +209,9 @@ static int op_arm_start(void)
 
 	mutex_lock(&op_arm_mutex);
 	if (!op_arm_enabled) {
-		ret = op_arm_model->start();
-		op_arm_enabled = !ret;
+		ret = 0;
+		op_perf_start();
+		op_arm_enabled = 1;
 	}
 	mutex_unlock(&op_arm_mutex);
 	return ret;
@@ -71,7 +221,7 @@ static void op_arm_stop(void)
 {
 	mutex_lock(&op_arm_mutex);
 	if (op_arm_enabled)
-		op_arm_model->stop();
+		op_perf_stop();
 	op_arm_enabled = 0;
 	mutex_unlock(&op_arm_mutex);
 }
@@ -81,7 +231,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state)
 {
 	mutex_lock(&op_arm_mutex);
 	if (op_arm_enabled)
-		op_arm_model->stop();
+		op_perf_stop();
 	mutex_unlock(&op_arm_mutex);
 	return 0;
 }
@@ -89,7 +239,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state)
 static int op_arm_resume(struct sys_device *dev)
 {
 	mutex_lock(&op_arm_mutex);
-	if (op_arm_enabled && op_arm_model->start())
+	if (op_arm_enabled && op_perf_start())
 		op_arm_enabled = 0;
 	mutex_unlock(&op_arm_mutex);
 	return 0;
@@ -126,58 +276,137 @@ static void  exit_driverfs(void)
 #define exit_driverfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-int __init oprofile_arch_init(struct oprofile_operations *ops)
+static int report_trace(struct stackframe *frame, void *d)
 {
-	struct op_arm_model_spec *spec = NULL;
-	int ret = -ENODEV;
+	unsigned int *depth = d;
 
-	ops->backtrace = arm_backtrace;
+	if (*depth) {
+		oprofile_add_trace(frame->pc);
+		(*depth)--;
+	}
 
-#ifdef CONFIG_CPU_XSCALE
-	spec = &op_xscale_spec;
-#endif
+	return *depth == 0;
+}
 
-#ifdef CONFIG_OPROFILE_ARMV6
-	spec = &op_armv6_spec;
-#endif
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ */
+struct frame_tail {
+	struct frame_tail *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
 
-#ifdef CONFIG_OPROFILE_MPCORE
-	spec = &op_mpcore_spec;
-#endif
+static struct frame_tail* user_backtrace(struct frame_tail *tail)
+{
+	struct frame_tail buftail[2];
 
-#ifdef CONFIG_OPROFILE_ARMV7
-	spec = &op_armv7_spec;
-#endif
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
+		return NULL;
 
-	if (spec) {
-		ret = spec->init();
-		if (ret < 0)
-			return ret;
+	oprofile_add_trace(buftail[0].lr);
 
-		counter_config = kcalloc(spec->num_counters, sizeof(struct op_counter_config),
-					 GFP_KERNEL);
-		if (!counter_config)
-			return -ENOMEM;
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (tail >= buftail[0].fp)
+		return NULL;
+
+	return buftail[0].fp-1;
+}
+
+static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	struct frame_tail *tail = ((struct frame_tail *) regs->ARM_fp) - 1;
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->ARM_fp;
+		frame.sp = regs->ARM_sp;
+		frame.lr = regs->ARM_lr;
+		frame.pc = regs->ARM_pc;
+		walk_stackframe(&frame, report_trace, &depth);
+		return;
+	}
+
+	while (depth-- && tail && !((unsigned long) tail & 3))
+		tail = user_backtrace(tail);
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	int cpu, ret = 0;
+
+	perf_num_counters = perf_get_max_events();
+
+	counter_config = kcalloc(perf_num_counters,
+			sizeof(struct op_counter_config), GFP_KERNEL);
 
-		op_arm_model = spec;
-		init_driverfs();
-		ops->create_files = op_arm_create_files;
-		ops->setup = op_arm_setup;
-		ops->shutdown = op_arm_stop;
-		ops->start = op_arm_start;
-		ops->stop = op_arm_stop;
-		ops->cpu_type = op_arm_model->name;
-		printk(KERN_INFO "oprofile: using %s\n", spec->name);
+	if (!counter_config) {
+		pr_info("oprofile: failed to allocate %d "
+				"counters\n", perf_num_counters);
+		return -ENOMEM;
 	}
 
+	for_each_possible_cpu(cpu) {
+		perf_events[cpu] = kcalloc(perf_num_counters,
+				sizeof(struct perf_event *), GFP_KERNEL);
+		if (!perf_events[cpu]) {
+			pr_info("oprofile: failed to allocate %d perf events "
+					"for cpu %d\n", perf_num_counters, cpu);
+			while (--cpu >= 0)
+				kfree(perf_events[cpu]);
+			return -ENOMEM;
+		}
+	}
+
+	init_driverfs();
+	ops->backtrace		= arm_backtrace;
+	ops->create_files	= op_arm_create_files;
+	ops->setup		= op_arm_setup;
+	ops->start		= op_arm_start;
+	ops->stop		= op_arm_stop;
+	ops->shutdown		= op_arm_stop;
+	ops->cpu_type		= op_name_from_perf_id(armpmu_get_pmu_id());
+
+	if (!ops->cpu_type)
+		ret = -ENODEV;
+	else
+		pr_info("oprofile: using %s\n", ops->cpu_type);
+
 	return ret;
 }
 
 void oprofile_arch_exit(void)
 {
-	if (op_arm_model) {
+	int cpu, id;
+	struct perf_event *event;
+
+	if (*perf_events) {
 		exit_driverfs();
-		op_arm_model = NULL;
+		for_each_possible_cpu(cpu) {
+			for (id = 0; id < perf_num_counters; ++id) {
+				event = perf_events[cpu][id];
+				if (event != NULL)
+					perf_event_release_kernel(event);
+			}
+			kfree(perf_events[cpu]);
+		}
 	}
-	kfree(counter_config);
+
+	if (counter_config)
+		kfree(counter_config);
+}
+#else
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	pr_info("oprofile: hardware counters not available\n");
+	return -ENODEV;
 }
+void oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/6] ARM: oprofile: use perf-events framework as backend
  2010-04-19 16:42     ` [PATCH 3/6] ARM: perf-events: allow modules to query the number of hardware counters Will Deacon
@ 2010-04-19 16:42       ` Will Deacon
  0 siblings, 0 replies; 4+ messages in thread
From: Will Deacon @ 2010-04-19 16:42 UTC (permalink / raw)
  To: linux-arm-kernel

There are currently two hardware performance monitoring subsystems in the
kernel for ARM: OProfile and perf-events. This creates the following problems:

1.) Duplicate PMU accessor code. Inevitable code drift may lead to bugs in one
framework that are fixed in the other.

2.) Locking issues. OProfile doesn't reprogram hardware counters between
profiling runs if the events to be monitored have not been changed. This means
that other profiling frameworks cannot use the counters if OProfile is in use.

3.) Due to differences in the two frameworks, it may not be possible to
compare the results obtained by OProfile with those obtained by perf.

This patch removes the OProfile PMU driver code and replaces it with calls
to perf, therefore solving the issues mentioned above.

The only userspace-visible change is the lack of SCU counter support for
11MPCore. This is currently unsupported by OProfile userspace tools anyway and
therefore shouldn't cause any problems.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Jamie Iles <jamie.iles@picochip.com>
Cc: Jean Pihet <jpihet@mvista.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/oprofile/common.c |  333 +++++++++++++++++++++++++++++++++++++-------
 1 files changed, 281 insertions(+), 52 deletions(-)

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 3fcd752..aad83df 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -2,32 +2,183 @@
  * @file common.c
  *
  * @remark Copyright 2004 Oprofile Authors
+ * @remark Copyright 2010 ARM Ltd.
  * @remark Read the file COPYING
  *
  * @author Zwane Mwaikambo
+ * @author Will Deacon [move to perf]
  */
 
+#include <linux/cpumask.h>
+#include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 #include <linux/oprofile.h>
-#include <linux/errno.h>
+#include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <linux/sysdev.h>
-#include <linux/mutex.h>
+#include <asm/stacktrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/perf_event.h>
+#include <asm/ptrace.h>
 
-#include "op_counter.h"
-#include "op_arm_model.h"
+#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+	unsigned long count;
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long unit_mask;
+	unsigned long kernel;
+	unsigned long user;
+	struct perf_event_attr attr;
+};
 
-static struct op_arm_model_spec *op_arm_model;
 static int op_arm_enabled;
 static DEFINE_MUTEX(op_arm_mutex);
 
-struct op_counter_config *counter_config;
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int perf_num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+			struct perf_sample_data *data, struct pt_regs *regs)
+{
+	int id;
+	u32 cpu = smp_processor_id();
+
+	for (id = 0; id < perf_num_counters; ++id)
+		if (perf_events[cpu][id] == event)
+			break;
+
+	if (id != perf_num_counters)
+		oprofile_add_sample(regs, id);
+	else
+		pr_warning("oprofile: ignoring spurious overflow "
+				"on cpu %u\n", cpu);
+}
+
+/*
+ * Called by op_arm_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+	int i;
+	u32 size = sizeof(struct perf_event_attr);
+	struct perf_event_attr *attr;
+
+	for (i = 0; i < perf_num_counters; ++i) {
+		attr = &counter_config[i].attr;
+		memset(attr, 0, size);
+		attr->type		= PERF_TYPE_RAW;
+		attr->size		= size;
+		attr->config		= counter_config[i].event;
+		attr->sample_period	= counter_config[i].count;
+		attr->pinned		= 1;
+	}
+}
+
+static int op_create_counter(int cpu, int event)
+{
+	int ret = 0;
+	struct perf_event *pevent;
+
+	if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
+		return ret;
+
+	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+						  cpu, -1,
+						  op_overflow_handler);
+
+	if (IS_ERR(pevent)) {
+		ret = PTR_ERR(pevent);
+	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_warning("oprofile: failed to enable event %d "
+				"on CPU %d\n", event, cpu);
+		ret = -EBUSY;
+	} else {
+		perf_events[cpu][event] = pevent;
+	}
+
+	return ret;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+	struct perf_event *pevent = perf_events[cpu][event];
+
+	if (pevent) {
+		perf_event_release_kernel(pevent);
+		perf_events[cpu][event] = NULL;
+	}
+}
+
+/*
+ * Called by op_arm_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+	int cpu, event, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		for (event = 0; event < perf_num_counters; ++event) {
+			ret = op_create_counter(cpu, event);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Called by op_arm_stop@the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+	int cpu, event;
+
+	for_each_online_cpu(cpu)
+		for (event = 0; event < perf_num_counters; ++event)
+			op_destroy_counter(cpu, event);
+}
+
+
+static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
+{
+	switch (id) {
+	case ARM_PERF_PMU_ID_XSCALE1:
+		return "arm/xscale1";
+	case ARM_PERF_PMU_ID_XSCALE2:
+		return "arm/xscale2";
+	case ARM_PERF_PMU_ID_V6:
+		return "arm/armv6";
+	case ARM_PERF_PMU_ID_V6MP:
+		return "arm/mpcore";
+	case ARM_PERF_PMU_ID_CA8:
+		return "arm/armv7";
+	case ARM_PERF_PMU_ID_CA9:
+		return "arm/armv7-ca9";
+	default:
+		return NULL;
+	}
+}
 
 static int op_arm_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
-	for (i = 0; i < op_arm_model->num_counters; i++) {
+	for (i = 0; i < perf_num_counters; i++) {
 		struct dentry *dir;
 		char buf[4];
 
@@ -46,12 +197,10 @@ static int op_arm_create_files(struct super_block *sb, struct dentry *root)
 
 static int op_arm_setup(void)
 {
-	int ret;
-
 	spin_lock(&oprofilefs_lock);
-	ret = op_arm_model->setup_ctrs();
+	op_perf_setup();
 	spin_unlock(&oprofilefs_lock);
-	return ret;
+	return 0;
 }
 
 static int op_arm_start(void)
@@ -60,8 +209,9 @@ static int op_arm_start(void)
 
 	mutex_lock(&op_arm_mutex);
 	if (!op_arm_enabled) {
-		ret = op_arm_model->start();
-		op_arm_enabled = !ret;
+		ret = 0;
+		op_perf_start();
+		op_arm_enabled = 1;
 	}
 	mutex_unlock(&op_arm_mutex);
 	return ret;
@@ -71,7 +221,7 @@ static void op_arm_stop(void)
 {
 	mutex_lock(&op_arm_mutex);
 	if (op_arm_enabled)
-		op_arm_model->stop();
+		op_perf_stop();
 	op_arm_enabled = 0;
 	mutex_unlock(&op_arm_mutex);
 }
@@ -81,7 +231,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state)
 {
 	mutex_lock(&op_arm_mutex);
 	if (op_arm_enabled)
-		op_arm_model->stop();
+		op_perf_stop();
 	mutex_unlock(&op_arm_mutex);
 	return 0;
 }
@@ -89,7 +239,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state)
 static int op_arm_resume(struct sys_device *dev)
 {
 	mutex_lock(&op_arm_mutex);
-	if (op_arm_enabled && op_arm_model->start())
+	if (op_arm_enabled && op_perf_start())
 		op_arm_enabled = 0;
 	mutex_unlock(&op_arm_mutex);
 	return 0;
@@ -126,58 +276,137 @@ static void  exit_driverfs(void)
 #define exit_driverfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-int __init oprofile_arch_init(struct oprofile_operations *ops)
+static int report_trace(struct stackframe *frame, void *d)
 {
-	struct op_arm_model_spec *spec = NULL;
-	int ret = -ENODEV;
+	unsigned int *depth = d;
 
-	ops->backtrace = arm_backtrace;
+	if (*depth) {
+		oprofile_add_trace(frame->pc);
+		(*depth)--;
+	}
 
-#ifdef CONFIG_CPU_XSCALE
-	spec = &op_xscale_spec;
-#endif
+	return *depth == 0;
+}
 
-#ifdef CONFIG_OPROFILE_ARMV6
-	spec = &op_armv6_spec;
-#endif
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ */
+struct frame_tail {
+	struct frame_tail *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
 
-#ifdef CONFIG_OPROFILE_MPCORE
-	spec = &op_mpcore_spec;
-#endif
+static struct frame_tail* user_backtrace(struct frame_tail *tail)
+{
+	struct frame_tail buftail[2];
 
-#ifdef CONFIG_OPROFILE_ARMV7
-	spec = &op_armv7_spec;
-#endif
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
+		return NULL;
 
-	if (spec) {
-		ret = spec->init();
-		if (ret < 0)
-			return ret;
+	oprofile_add_trace(buftail[0].lr);
 
-		counter_config = kcalloc(spec->num_counters, sizeof(struct op_counter_config),
-					 GFP_KERNEL);
-		if (!counter_config)
-			return -ENOMEM;
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (tail >= buftail[0].fp)
+		return NULL;
+
+	return buftail[0].fp-1;
+}
+
+static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	struct frame_tail *tail = ((struct frame_tail *) regs->ARM_fp) - 1;
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->ARM_fp;
+		frame.sp = regs->ARM_sp;
+		frame.lr = regs->ARM_lr;
+		frame.pc = regs->ARM_pc;
+		walk_stackframe(&frame, report_trace, &depth);
+		return;
+	}
+
+	while (depth-- && tail && !((unsigned long) tail & 3))
+		tail = user_backtrace(tail);
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	int cpu, ret = 0;
+
+	perf_num_counters = armpmu_get_max_events();
+
+	counter_config = kcalloc(perf_num_counters,
+			sizeof(struct op_counter_config), GFP_KERNEL);
 
-		op_arm_model = spec;
-		init_driverfs();
-		ops->create_files = op_arm_create_files;
-		ops->setup = op_arm_setup;
-		ops->shutdown = op_arm_stop;
-		ops->start = op_arm_start;
-		ops->stop = op_arm_stop;
-		ops->cpu_type = op_arm_model->name;
-		printk(KERN_INFO "oprofile: using %s\n", spec->name);
+	if (!counter_config) {
+		pr_info("oprofile: failed to allocate %d "
+				"counters\n", perf_num_counters);
+		return -ENOMEM;
 	}
 
+	for_each_possible_cpu(cpu) {
+		perf_events[cpu] = kcalloc(perf_num_counters,
+				sizeof(struct perf_event *), GFP_KERNEL);
+		if (!perf_events[cpu]) {
+			pr_info("oprofile: failed to allocate %d perf events "
+					"for cpu %d\n", perf_num_counters, cpu);
+			while (--cpu >= 0)
+				kfree(perf_events[cpu]);
+			return -ENOMEM;
+		}
+	}
+
+	init_driverfs();
+	ops->backtrace		= arm_backtrace;
+	ops->create_files	= op_arm_create_files;
+	ops->setup		= op_arm_setup;
+	ops->start		= op_arm_start;
+	ops->stop		= op_arm_stop;
+	ops->shutdown		= op_arm_stop;
+	ops->cpu_type		= op_name_from_perf_id(armpmu_get_pmu_id());
+
+	if (!ops->cpu_type)
+		ret = -ENODEV;
+	else
+		pr_info("oprofile: using %s\n", ops->cpu_type);
+
 	return ret;
 }
 
 void oprofile_arch_exit(void)
 {
-	if (op_arm_model) {
+	int cpu, id;
+	struct perf_event *event;
+
+	if (*perf_events) {
 		exit_driverfs();
-		op_arm_model = NULL;
+		for_each_possible_cpu(cpu) {
+			for (id = 0; id < perf_num_counters; ++id) {
+				event = perf_events[cpu][id];
+				if (event != NULL)
+					perf_event_release_kernel(event);
+			}
+			kfree(perf_events[cpu]);
+		}
 	}
-	kfree(counter_config);
+
+	if (counter_config)
+		kfree(counter_config);
+}
+#else
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	pr_info("oprofile: hardware counters not available\n");
+	return -ENODEV;
 }
+void oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/6] ARM: oprofile: use perf-events framework as backend
@ 2010-05-12  4:35 Deng-Cheng Zhu
  2010-05-12  9:32 ` Will Deacon
  0 siblings, 1 reply; 4+ messages in thread
From: Deng-Cheng Zhu @ 2010-05-12  4:35 UTC (permalink / raw)
  To: linux-arm-kernel

Hi, Will


This is a great idea.

Here's one comment on irq handling (Maybe you or somebody else has already
addressed in other threads - I'm new to this list):

In original Oprofile code, the irq handler mainly does 3 things - clearing
the flag, resetting the counter, and calling oprofile_add_sample(). By
using perf events as the backend of Oprofile, they are now scattered into
a relatively long code path in armpmu->handle_irq(). Could this be
something improvable?

In addition, the counterpart of resetting the counter in perf events is
the armpmu->write_counter() in armpmu_event_set_period(). How could you
make sure the value written into the counter is the same between the
original Oprofile and the "new" Oprofile?


Deng-Cheng


On 2010-04-19 16:42, Will Deacon wrote:
 > There are currently two hardware performance monitoring subsystems in the
 > kernel for ARM: OProfile and perf-events. This creates the following 
problems:
 >
 > 1.) Duplicate PMU accessor code. Inevitable code drift may lead to 
bugs in one
 > framework that are fixed in the other.
 >
 > 2.) Locking issues. OProfile doesn't reprogram hardware counters between
 > profiling runs if the events to be monitored have not been changed. 
This means
 > that other profiling frameworks cannot use the counters if OProfile 
is in use.
 >
 > 3.) Due to differences in the two frameworks, it may not be possible to
 > compare the results obtained by OProfile with those obtained by perf.
 >
 > This patch removes the OProfile PMU driver code and replaces it with 
calls
 > to perf, therefore solving the issues mentioned above.
 >

 > +/*
 > + * Overflow callback for oprofile.
 > + */
 > +static void op_overflow_handler(struct perf_event *event, int unused,
 > + struct perf_sample_data *data, struct pt_regs *regs)
 > +{
 > + int id;
 > + u32 cpu = smp_processor_id();
 > +
 > + for (id = 0; id < perf_num_counters; ++id)
 > + if (perf_events[cpu][id] == event)
 > + break;
 > +
 > + if (id != perf_num_counters)
 > + oprofile_add_sample(regs, id);
 > + else
 > + pr_warning("oprofile: ignoring spurious overflow "
 > + "on cpu %u\n", cpu);
 > +}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 4/6] ARM: oprofile: use perf-events framework as backend
  2010-05-12  4:35 [PATCH 4/6] ARM: oprofile: use perf-events framework as backend Deng-Cheng Zhu
@ 2010-05-12  9:32 ` Will Deacon
  0 siblings, 0 replies; 4+ messages in thread
From: Will Deacon @ 2010-05-12  9:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2010-05-12 at 05:35 +0100, Deng-Cheng Zhu wrote:
> Hi, Will
> 
Hi Deng-Cheng,
> 
> This is a great idea.
> 
I'm pleased to hear that :)

> Here's one comment on irq handling (Maybe you or somebody else has already
> addressed in other threads - I'm new to this list):
> 
This code is pretty new, so comments and concerns are more than welcome.

> In original Oprofile code, the irq handler mainly does 3 things - clearing
> the flag, resetting the counter, and calling oprofile_add_sample(). By
> using perf events as the backend of Oprofile, they are now scattered into
> a relatively long code path in armpmu->handle_irq(). Could this be
> something improvable?
> 
The IRQ handling path is indeed longer, but this shouldn't affect the
profiling data you obtain provided that you aren't sampling with an
unrealistic period [and essentially profiling the profiling framework].

The PMU interrupt handler for ARM runs with interrupts disabled and
passes the IRQ registers to the overflow handler, so although the
execution path is longer than before, you're still dealing with the same
data.

I guess the benefit of reducing this code path is that you minimise the
overhead of profiling, but unless you're seeing big problems with the
new framework, I don't think this is an issue.

> In addition, the counterpart of resetting the counter in perf events is
> the armpmu->write_counter() in armpmu_event_set_period(). How could you
> make sure the value written into the counter is the same between the
> original Oprofile and the "new" Oprofile?
> 
For Oprofile, we only deal with the sample_period attribute field and
leave the frequency stuff alone. This means that the only time that a
different value is written to the hardware is when we are compensating
for changes to the counter that have occurred during the handling path
[or more importantly, during a section of code running with interrupts
disabled]. I don't believe OProfile on ARM used to do this, but it
really should have.

One thing that might be worth communicating to OProfile is if an event
is throttled by perf. In this case, the sample period would need
adjusting by userspace. Currently I don't think we can detect if this
happens.

I hope that helps,

Will

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-05-12  9:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-12  4:35 [PATCH 4/6] ARM: oprofile: use perf-events framework as backend Deng-Cheng Zhu
2010-05-12  9:32 ` Will Deacon
  -- strict thread matches above, loose matches on Subject: below --
2010-04-19 16:42 [PATCH 0/6] ARM: oprofile: use perf-events framework as backend [v4] Will Deacon
2010-04-19 16:42 ` [PATCH 1/6] ARM: perf-events: use numeric ID to identify PMU Will Deacon
2010-04-19 16:42   ` [PATCH 2/6] ARM: perf-events: add support for xscale PMUs Will Deacon
2010-04-19 16:42     ` [PATCH 3/6] ARM: perf-events: allow modules to query the number of hardware counters Will Deacon
2010-04-19 16:42       ` [PATCH 4/6] ARM: oprofile: use perf-events framework as backend Will Deacon
2010-04-12 15:04 [PATCH 0/6] ARM: oprofile: use perf-events framework as backend [v3] Will Deacon
2010-04-12 15:04 ` [PATCH 1/6] ARM: perf-events: use numeric ID to identify PMU Will Deacon
2010-04-12 15:04   ` [PATCH 2/6] ARM: perf-events: add support for xscale PMUs Will Deacon
2010-04-12 15:04     ` [PATCH 3/6] perf-events: allow modules to query the maximum number of perf events Will Deacon
2010-04-12 15:04       ` [PATCH 4/6] ARM: oprofile: use perf-events framework as backend Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.