* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2009-12-14 14:04 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
@ 2009-12-14 14:04 ` Jamie Iles
2009-12-14 16:01 ` Jean Pihet
2009-12-14 16:04 ` Will Deacon
0 siblings, 2 replies; 30+ messages in thread
From: Jamie Iles @ 2009-12-14 14:04 UTC (permalink / raw)
To: linux-arm-kernel
Make sure that we have access to the performance counters and
that they aren't being used by perf events or anything else.
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/oprofile/op_model_arm11_core.c | 4 +-
arch/arm/oprofile/op_model_arm11_core.h | 4 +-
arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
arch/arm/oprofile/op_model_v6.c | 33 ++++++++++++++----------
arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.h | 4 +-
arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
7 files changed, 85 insertions(+), 67 deletions(-)
diff --git a/arch/arm/oprofile/op_model_arm11_core.c b/arch/arm/oprofile/op_model_arm11_core.c
index ad80752..ef3e265 100644
--- a/arch/arm/oprofile/op_model_arm11_core.c
+++ b/arch/arm/oprofile/op_model_arm11_core.c
@@ -132,7 +132,7 @@ static irqreturn_t arm11_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int arm11_request_interrupts(int *irqs, int nr)
+int arm11_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -153,7 +153,7 @@ int arm11_request_interrupts(int *irqs, int nr)
return ret;
}
-void arm11_release_interrupts(int *irqs, int nr)
+void arm11_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
diff --git a/arch/arm/oprofile/op_model_arm11_core.h b/arch/arm/oprofile/op_model_arm11_core.h
index 6f8538e..1902b99 100644
--- a/arch/arm/oprofile/op_model_arm11_core.h
+++ b/arch/arm/oprofile/op_model_arm11_core.h
@@ -39,7 +39,7 @@
int arm11_setup_pmu(void);
int arm11_start_pmu(void);
int arm11_stop_pmu(void);
-int arm11_request_interrupts(int *, int);
-void arm11_release_interrupts(int *, int);
+int arm11_request_interrupts(const int *, int);
+void arm11_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4ce0f98..f73ce87 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -32,6 +32,7 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
@@ -43,6 +44,7 @@
#include <mach/hardware.h>
#include <mach/board-eb.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -58,6 +60,7 @@
* Bitmask of used SCU counters
*/
static unsigned int scu_em_used;
+static const struct pmu_irqs *pmu_irqs;
/*
* 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
@@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
return 0;
}
-static int arm11_irqs[] = {
- [0] = IRQ_EB11MP_PMU_CPU0,
- [1] = IRQ_EB11MP_PMU_CPU1,
- [2] = IRQ_EB11MP_PMU_CPU2,
- [3] = IRQ_EB11MP_PMU_CPU3
-};
-
static int em_start(void)
{
int ret;
- ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
if (ret == 0) {
em_call_function(arm11_start_pmu);
ret = scu_start();
- if (ret)
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ if (ret) {
+ arm11_release_interrupts(pmu_irqs->irqs,
+ pmu_irqs->num_irqs);
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
}
+
+out:
return ret;
}
static void em_stop(void)
{
em_call_function(arm11_stop_pmu);
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
scu_stop();
+ release_pmu(pmu_irqs);
}
/*
@@ -283,15 +293,7 @@ static int em_setup(void)
em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
- /*
- * Send CP15 PMU interrupts to the owner CPU.
- */
- em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
- em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
- em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
- em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
-
- return 0;
+ return init_pmu();
}
struct op_arm_model_spec op_mpcore_spec = {
diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
index e468017..a22357a 100644
--- a/arch/arm/oprofile/op_model_v6.c
+++ b/arch/arm/oprofile/op_model_v6.c
@@ -19,42 +19,47 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <asm/irq.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_arm11_core.h"
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP2
- 3,
-#endif
-#ifdef CONFIG_ARCH_BCMRING
- IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
-#endif
-#ifdef CONFIG_ARCH_PC3XX
- IRQ_NPMUIRQ,
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv6_pmu_stop(void)
{
arm11_stop_pmu();
- arm11_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv6_pmu_start(void)
{
int ret;
- ret = arm11_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
ret = arm11_start_pmu();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
+out:
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c
index f20295f..9258fca 100644
--- a/arch/arm/oprofile/op_model_v7.c
+++ b/arch/arm/oprofile/op_model_v7.c
@@ -11,11 +11,14 @@
*/
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp.h>
+#include <asm/pmu.h>
+
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_v7.h"
@@ -299,7 +302,7 @@ static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int armv7_request_interrupts(int *irqs, int nr)
+int armv7_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -322,7 +325,7 @@ int armv7_request_interrupts(int *irqs, int nr)
return ret;
}
-void armv7_release_interrupts(int *irqs, int nr)
+void armv7_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
@@ -366,12 +369,7 @@ static void armv7_pmnc_dump_regs(void)
}
#endif
-
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP3
- INT_34XX_BENCH_MPU_EMUL,
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv7_pmnc_stop(void)
{
@@ -379,19 +377,29 @@ static void armv7_pmnc_stop(void)
armv7_pmnc_dump_regs();
#endif
armv7_stop_pmnc();
- armv7_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ armv7_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv7_pmnc_start(void)
{
int ret;
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
#ifdef DEBUG
armv7_pmnc_dump_regs();
#endif
- ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ ret = armv7_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
armv7_start_pmnc();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h
index 0e19bcc..9ca334b 100644
--- a/arch/arm/oprofile/op_model_v7.h
+++ b/arch/arm/oprofile/op_model_v7.h
@@ -97,7 +97,7 @@
int armv7_setup_pmu(void);
int armv7_start_pmu(void);
int armv7_stop_pmu(void);
-int armv7_request_interrupts(int *, int);
-void armv7_release_interrupts(int *, int);
+int armv7_request_interrupts(const int *, int);
+void armv7_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
index 724ab9c..1d34a02 100644
--- a/arch/arm/oprofile/op_model_xscale.c
+++ b/arch/arm/oprofile/op_model_xscale.c
@@ -17,12 +17,14 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/cputype.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -33,17 +35,6 @@
#define PMU_RESET (CCNT_RESET | PMN_RESET)
#define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
-/* TODO do runtime detection */
-#ifdef CONFIG_ARCH_IOP32X
-#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_IOP33X
-#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_PXA
-#define XSCALE_PMU_IRQ IRQ_PMU
-#endif
-
/*
* Different types of events that can be counted by the XScale PMU
* as used by Oprofile userspace. Here primarily for documentation
@@ -367,6 +358,8 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
+static const struct pmu_irqs *pmu_irqs;
+
static void xscale_pmu_stop(void)
{
u32 pmnc = read_pmnc();
@@ -374,20 +367,30 @@ static void xscale_pmu_stop(void)
pmnc &= ~PMU_ENABLE;
write_pmnc(pmnc);
- free_irq(XSCALE_PMU_IRQ, results);
+ free_irq(pmu_irqs->irqs[0], results);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int xscale_pmu_start(void)
{
int ret;
- u32 pmnc = read_pmnc();
+ u32 pmnc;
+
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
+ pmnc = read_pmnc();
- ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
- "XScale PMU", (void *)results);
+ ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt,
+ IRQF_DISABLED, "XScale PMU", (void *)results);
if (ret < 0) {
printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
- XSCALE_PMU_IRQ);
+ pmu_irqs->irqs[0]);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
return ret;
}
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2009-12-14 14:04 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
@ 2009-12-14 16:01 ` Jean Pihet
2009-12-14 16:04 ` Will Deacon
1 sibling, 0 replies; 30+ messages in thread
From: Jean Pihet @ 2009-12-14 16:01 UTC (permalink / raw)
To: linux-arm-kernel
Hi,
On Mon, 2009-12-14 at 14:04 +0000, Jamie Iles wrote:
> Make sure that we have access to the performance counters and
> that they aren't being used by perf events or anything else.
>
> Cc: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
> ---
> arch/arm/oprofile/op_model_arm11_core.c | 4 +-
> arch/arm/oprofile/op_model_arm11_core.h | 4 +-
> arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
> arch/arm/oprofile/op_model_v6.c | 33 ++++++++++++++----------
> arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
> arch/arm/oprofile/op_model_v7.h | 4 +-
I am OK with the changes for ARMv7.
Regards,
Jean
> arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
> 7 files changed, 85 insertions(+), 67 deletions(-)
>
> diff --git a/arch/arm/oprofile/op_model_arm11_core.c b/arch/arm/oprofile/op_model_arm11_core.c
> index ad80752..ef3e265 100644
> --- a/arch/arm/oprofile/op_model_arm11_core.c
> +++ b/arch/arm/oprofile/op_model_arm11_core.c
> @@ -132,7 +132,7 @@ static irqreturn_t arm11_pmu_interrupt(int irq, void *arg)
> return IRQ_HANDLED;
> }
>
> -int arm11_request_interrupts(int *irqs, int nr)
> +int arm11_request_interrupts(const int *irqs, int nr)
> {
> unsigned int i;
> int ret = 0;
> @@ -153,7 +153,7 @@ int arm11_request_interrupts(int *irqs, int nr)
> return ret;
> }
>
> -void arm11_release_interrupts(int *irqs, int nr)
> +void arm11_release_interrupts(const int *irqs, int nr)
> {
> unsigned int i;
>
> diff --git a/arch/arm/oprofile/op_model_arm11_core.h b/arch/arm/oprofile/op_model_arm11_core.h
> index 6f8538e..1902b99 100644
> --- a/arch/arm/oprofile/op_model_arm11_core.h
> +++ b/arch/arm/oprofile/op_model_arm11_core.h
> @@ -39,7 +39,7 @@
> int arm11_setup_pmu(void);
> int arm11_start_pmu(void);
> int arm11_stop_pmu(void);
> -int arm11_request_interrupts(int *, int);
> -void arm11_release_interrupts(int *, int);
> +int arm11_request_interrupts(const int *, int);
> +void arm11_release_interrupts(const int *, int);
>
> #endif
> diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
> index 4ce0f98..f73ce87 100644
> --- a/arch/arm/oprofile/op_model_mpcore.c
> +++ b/arch/arm/oprofile/op_model_mpcore.c
> @@ -32,6 +32,7 @@
> /* #define DEBUG */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/sched.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> @@ -43,6 +44,7 @@
> #include <mach/hardware.h>
> #include <mach/board-eb.h>
> #include <asm/system.h>
> +#include <asm/pmu.h>
>
> #include "op_counter.h"
> #include "op_arm_model.h"
> @@ -58,6 +60,7 @@
> * Bitmask of used SCU counters
> */
> static unsigned int scu_em_used;
> +static const struct pmu_irqs *pmu_irqs;
>
> /*
> * 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
> @@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
> return 0;
> }
>
> -static int arm11_irqs[] = {
> - [0] = IRQ_EB11MP_PMU_CPU0,
> - [1] = IRQ_EB11MP_PMU_CPU1,
> - [2] = IRQ_EB11MP_PMU_CPU2,
> - [3] = IRQ_EB11MP_PMU_CPU3
> -};
> -
> static int em_start(void)
> {
> int ret;
>
> - ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
> + pmu_irqs = reserve_pmu();
> + if (IS_ERR(pmu_irqs)) {
> + ret = PTR_ERR(pmu_irqs);
> + goto out;
> + }
> +
> + ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> if (ret == 0) {
> em_call_function(arm11_start_pmu);
>
> ret = scu_start();
> - if (ret)
> - arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
> + if (ret) {
> + arm11_release_interrupts(pmu_irqs->irqs,
> + pmu_irqs->num_irqs);
> + } else {
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> + }
> }
> +
> +out:
> return ret;
> }
>
> static void em_stop(void)
> {
> em_call_function(arm11_stop_pmu);
> - arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
> + arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> scu_stop();
> + release_pmu(pmu_irqs);
> }
>
> /*
> @@ -283,15 +293,7 @@ static int em_setup(void)
> em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
> em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
>
> - /*
> - * Send CP15 PMU interrupts to the owner CPU.
> - */
> - em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
> - em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
> - em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
> - em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
> -
> - return 0;
> + return init_pmu();
> }
>
> struct op_arm_model_spec op_mpcore_spec = {
> diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
> index e468017..a22357a 100644
> --- a/arch/arm/oprofile/op_model_v6.c
> +++ b/arch/arm/oprofile/op_model_v6.c
> @@ -19,42 +19,47 @@
> /* #define DEBUG */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/sched.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> #include <asm/irq.h>
> #include <asm/system.h>
> +#include <asm/pmu.h>
>
> #include "op_counter.h"
> #include "op_arm_model.h"
> #include "op_model_arm11_core.h"
>
> -static int irqs[] = {
> -#ifdef CONFIG_ARCH_OMAP2
> - 3,
> -#endif
> -#ifdef CONFIG_ARCH_BCMRING
> - IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
> -#endif
> -#ifdef CONFIG_ARCH_PC3XX
> - IRQ_NPMUIRQ,
> -#endif
> -};
> +static const struct pmu_irqs *pmu_irqs;
>
> static void armv6_pmu_stop(void)
> {
> arm11_stop_pmu();
> - arm11_release_interrupts(irqs, ARRAY_SIZE(irqs));
> + arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> }
>
> static int armv6_pmu_start(void)
> {
> int ret;
>
> - ret = arm11_request_interrupts(irqs, ARRAY_SIZE(irqs));
> - if (ret >= 0)
> + pmu_irqs = reserve_pmu();
> + if (IS_ERR(pmu_irqs)) {
> + ret = PTR_ERR(pmu_irqs);
> + goto out;
> + }
> +
> + ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> + if (ret >= 0) {
> ret = arm11_start_pmu();
> + } else {
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> + }
>
> +out:
> return ret;
> }
>
> diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c
> index f20295f..9258fca 100644
> --- a/arch/arm/oprofile/op_model_v7.c
> +++ b/arch/arm/oprofile/op_model_v7.c
> @@ -11,11 +11,14 @@
> */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> #include <linux/irq.h>
> #include <linux/smp.h>
>
> +#include <asm/pmu.h>
> +
> #include "op_counter.h"
> #include "op_arm_model.h"
> #include "op_model_v7.h"
> @@ -299,7 +302,7 @@ static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg)
> return IRQ_HANDLED;
> }
>
> -int armv7_request_interrupts(int *irqs, int nr)
> +int armv7_request_interrupts(const int *irqs, int nr)
> {
> unsigned int i;
> int ret = 0;
> @@ -322,7 +325,7 @@ int armv7_request_interrupts(int *irqs, int nr)
> return ret;
> }
>
> -void armv7_release_interrupts(int *irqs, int nr)
> +void armv7_release_interrupts(const int *irqs, int nr)
> {
> unsigned int i;
>
> @@ -366,12 +369,7 @@ static void armv7_pmnc_dump_regs(void)
> }
> #endif
>
> -
> -static int irqs[] = {
> -#ifdef CONFIG_ARCH_OMAP3
> - INT_34XX_BENCH_MPU_EMUL,
> -#endif
> -};
> +static const struct pmu_irqs *pmu_irqs;
>
> static void armv7_pmnc_stop(void)
> {
> @@ -379,19 +377,29 @@ static void armv7_pmnc_stop(void)
> armv7_pmnc_dump_regs();
> #endif
> armv7_stop_pmnc();
> - armv7_release_interrupts(irqs, ARRAY_SIZE(irqs));
> + armv7_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> }
>
> static int armv7_pmnc_start(void)
> {
> int ret;
>
> + pmu_irqs = reserve_pmu();
> + if (IS_ERR(pmu_irqs))
> + return PTR_ERR(pmu_irqs);
> +
> #ifdef DEBUG
> armv7_pmnc_dump_regs();
> #endif
> - ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs));
> - if (ret >= 0)
> + ret = armv7_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> + if (ret >= 0) {
> armv7_start_pmnc();
> + } else {
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> + }
>
> return ret;
> }
> diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h
> index 0e19bcc..9ca334b 100644
> --- a/arch/arm/oprofile/op_model_v7.h
> +++ b/arch/arm/oprofile/op_model_v7.h
> @@ -97,7 +97,7 @@
> int armv7_setup_pmu(void);
> int armv7_start_pmu(void);
> int armv7_stop_pmu(void);
> -int armv7_request_interrupts(int *, int);
> -void armv7_release_interrupts(int *, int);
> +int armv7_request_interrupts(const int *, int);
> +void armv7_release_interrupts(const int *, int);
>
> #endif
> diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
> index 724ab9c..1d34a02 100644
> --- a/arch/arm/oprofile/op_model_xscale.c
> +++ b/arch/arm/oprofile/op_model_xscale.c
> @@ -17,12 +17,14 @@
> /* #define DEBUG */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/sched.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> #include <linux/irq.h>
>
> #include <asm/cputype.h>
> +#include <asm/pmu.h>
>
> #include "op_counter.h"
> #include "op_arm_model.h"
> @@ -33,17 +35,6 @@
> #define PMU_RESET (CCNT_RESET | PMN_RESET)
> #define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
>
> -/* TODO do runtime detection */
> -#ifdef CONFIG_ARCH_IOP32X
> -#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU
> -#endif
> -#ifdef CONFIG_ARCH_IOP33X
> -#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU
> -#endif
> -#ifdef CONFIG_ARCH_PXA
> -#define XSCALE_PMU_IRQ IRQ_PMU
> -#endif
> -
> /*
> * Different types of events that can be counted by the XScale PMU
> * as used by Oprofile userspace. Here primarily for documentation
> @@ -367,6 +358,8 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
> return IRQ_HANDLED;
> }
>
> +static const struct pmu_irqs *pmu_irqs;
> +
> static void xscale_pmu_stop(void)
> {
> u32 pmnc = read_pmnc();
> @@ -374,20 +367,30 @@ static void xscale_pmu_stop(void)
> pmnc &= ~PMU_ENABLE;
> write_pmnc(pmnc);
>
> - free_irq(XSCALE_PMU_IRQ, results);
> + free_irq(pmu_irqs->irqs[0], results);
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> }
>
> static int xscale_pmu_start(void)
> {
> int ret;
> - u32 pmnc = read_pmnc();
> + u32 pmnc;
> +
> + pmu_irqs = reserve_pmu();
> + if (IS_ERR(pmu_irqs))
> + return PTR_ERR(pmu_irqs);
> +
> + pmnc = read_pmnc();
>
> - ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
> - "XScale PMU", (void *)results);
> + ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt,
> + IRQF_DISABLED, "XScale PMU", (void *)results);
>
> if (ret < 0) {
> printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
> - XSCALE_PMU_IRQ);
> + pmu_irqs->irqs[0]);
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> return ret;
> }
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2009-12-14 14:04 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2009-12-14 16:01 ` Jean Pihet
@ 2009-12-14 16:04 ` Will Deacon
2009-12-14 16:10 ` Jamie Iles
1 sibling, 1 reply; 30+ messages in thread
From: Will Deacon @ 2009-12-14 16:04 UTC (permalink / raw)
To: linux-arm-kernel
* Jamie Iles wrote:
> Make sure that we have access to the performance counters and
> that they aren't being used by perf events or anything else.
>
> diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
> index e468017..a22357a 100644
> --- a/arch/arm/oprofile/op_model_v6.c
> +++ b/arch/arm/oprofile/op_model_v6.c
> @@ -19,42 +19,47 @@
> /* #define DEBUG */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/sched.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> #include <asm/irq.h>
> #include <asm/system.h>
> +#include <asm/pmu.h>
>
> #include "op_counter.h"
> #include "op_arm_model.h"
> #include "op_model_arm11_core.h"
>
> -static int irqs[] = {
> -#ifdef CONFIG_ARCH_OMAP2
> - 3,
> -#endif
> -#ifdef CONFIG_ARCH_BCMRING
> - IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
> -#endif
> -#ifdef CONFIG_ARCH_PC3XX
> - IRQ_NPMUIRQ,
> -#endif
<snip>
These last three lines don't apply cleanly.
I think you've based this patch on top of your previous one.
Will
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2009-12-14 16:04 ` Will Deacon
@ 2009-12-14 16:10 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2009-12-14 16:10 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Dec 14, 2009 at 04:04:25PM -0000, Will Deacon wrote:
> > -static int irqs[] = {
> > -#ifdef CONFIG_ARCH_OMAP2
> > - 3,
> > -#endif
> > -#ifdef CONFIG_ARCH_BCMRING
> > - IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
> > -#endif
> > -#ifdef CONFIG_ARCH_PC3XX
> > - IRQ_NPMUIRQ,
> > -#endif
> <snip>
>
> These last three lines don't apply cleanly.
> I think you've based this patch on top of your previous one.
Apologies, this is is from a platform (pc3xx) that isn't in mainline. I've
been using this platform to test the perf events code. I'll submit the next
lot directly off of tip/master.
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2009-12-15 11:15 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
@ 2009-12-15 11:15 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2009-12-15 11:15 UTC (permalink / raw)
To: linux-arm-kernel
Make sure that we have access to the performance counters and
that they aren't being used by perf events or anything else.
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/oprofile/op_model_arm11_core.c | 4 +-
arch/arm/oprofile/op_model_arm11_core.h | 4 +-
arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
arch/arm/oprofile/op_model_v6.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.h | 4 +-
arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
7 files changed, 85 insertions(+), 64 deletions(-)
diff --git a/arch/arm/oprofile/op_model_arm11_core.c b/arch/arm/oprofile/op_model_arm11_core.c
index ad80752..ef3e265 100644
--- a/arch/arm/oprofile/op_model_arm11_core.c
+++ b/arch/arm/oprofile/op_model_arm11_core.c
@@ -132,7 +132,7 @@ static irqreturn_t arm11_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int arm11_request_interrupts(int *irqs, int nr)
+int arm11_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -153,7 +153,7 @@ int arm11_request_interrupts(int *irqs, int nr)
return ret;
}
-void arm11_release_interrupts(int *irqs, int nr)
+void arm11_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
diff --git a/arch/arm/oprofile/op_model_arm11_core.h b/arch/arm/oprofile/op_model_arm11_core.h
index 6f8538e..1902b99 100644
--- a/arch/arm/oprofile/op_model_arm11_core.h
+++ b/arch/arm/oprofile/op_model_arm11_core.h
@@ -39,7 +39,7 @@
int arm11_setup_pmu(void);
int arm11_start_pmu(void);
int arm11_stop_pmu(void);
-int arm11_request_interrupts(int *, int);
-void arm11_release_interrupts(int *, int);
+int arm11_request_interrupts(const int *, int);
+void arm11_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4ce0f98..f73ce87 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -32,6 +32,7 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
@@ -43,6 +44,7 @@
#include <mach/hardware.h>
#include <mach/board-eb.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -58,6 +60,7 @@
* Bitmask of used SCU counters
*/
static unsigned int scu_em_used;
+static const struct pmu_irqs *pmu_irqs;
/*
* 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
@@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
return 0;
}
-static int arm11_irqs[] = {
- [0] = IRQ_EB11MP_PMU_CPU0,
- [1] = IRQ_EB11MP_PMU_CPU1,
- [2] = IRQ_EB11MP_PMU_CPU2,
- [3] = IRQ_EB11MP_PMU_CPU3
-};
-
static int em_start(void)
{
int ret;
- ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
if (ret == 0) {
em_call_function(arm11_start_pmu);
ret = scu_start();
- if (ret)
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ if (ret) {
+ arm11_release_interrupts(pmu_irqs->irqs,
+ pmu_irqs->num_irqs);
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
}
+
+out:
return ret;
}
static void em_stop(void)
{
em_call_function(arm11_stop_pmu);
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
scu_stop();
+ release_pmu(pmu_irqs);
}
/*
@@ -283,15 +293,7 @@ static int em_setup(void)
em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
- /*
- * Send CP15 PMU interrupts to the owner CPU.
- */
- em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
- em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
- em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
- em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
-
- return 0;
+ return init_pmu();
}
struct op_arm_model_spec op_mpcore_spec = {
diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
index f7d2ec5..a22357a 100644
--- a/arch/arm/oprofile/op_model_v6.c
+++ b/arch/arm/oprofile/op_model_v6.c
@@ -19,39 +19,47 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <asm/irq.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_arm11_core.h"
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP2
- 3,
-#endif
-#ifdef CONFIG_ARCH_BCMRING
- IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv6_pmu_stop(void)
{
arm11_stop_pmu();
- arm11_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv6_pmu_start(void)
{
int ret;
- ret = arm11_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
ret = arm11_start_pmu();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
+out:
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c
index f20295f..9258fca 100644
--- a/arch/arm/oprofile/op_model_v7.c
+++ b/arch/arm/oprofile/op_model_v7.c
@@ -11,11 +11,14 @@
*/
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp.h>
+#include <asm/pmu.h>
+
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_v7.h"
@@ -299,7 +302,7 @@ static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int armv7_request_interrupts(int *irqs, int nr)
+int armv7_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -322,7 +325,7 @@ int armv7_request_interrupts(int *irqs, int nr)
return ret;
}
-void armv7_release_interrupts(int *irqs, int nr)
+void armv7_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
@@ -366,12 +369,7 @@ static void armv7_pmnc_dump_regs(void)
}
#endif
-
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP3
- INT_34XX_BENCH_MPU_EMUL,
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv7_pmnc_stop(void)
{
@@ -379,19 +377,29 @@ static void armv7_pmnc_stop(void)
armv7_pmnc_dump_regs();
#endif
armv7_stop_pmnc();
- armv7_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ armv7_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv7_pmnc_start(void)
{
int ret;
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
#ifdef DEBUG
armv7_pmnc_dump_regs();
#endif
- ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ ret = armv7_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
armv7_start_pmnc();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h
index 0e19bcc..9ca334b 100644
--- a/arch/arm/oprofile/op_model_v7.h
+++ b/arch/arm/oprofile/op_model_v7.h
@@ -97,7 +97,7 @@
int armv7_setup_pmu(void);
int armv7_start_pmu(void);
int armv7_stop_pmu(void);
-int armv7_request_interrupts(int *, int);
-void armv7_release_interrupts(int *, int);
+int armv7_request_interrupts(const int *, int);
+void armv7_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
index 724ab9c..1d34a02 100644
--- a/arch/arm/oprofile/op_model_xscale.c
+++ b/arch/arm/oprofile/op_model_xscale.c
@@ -17,12 +17,14 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/cputype.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -33,17 +35,6 @@
#define PMU_RESET (CCNT_RESET | PMN_RESET)
#define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
-/* TODO do runtime detection */
-#ifdef CONFIG_ARCH_IOP32X
-#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_IOP33X
-#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_PXA
-#define XSCALE_PMU_IRQ IRQ_PMU
-#endif
-
/*
* Different types of events that can be counted by the XScale PMU
* as used by Oprofile userspace. Here primarily for documentation
@@ -367,6 +358,8 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
+static const struct pmu_irqs *pmu_irqs;
+
static void xscale_pmu_stop(void)
{
u32 pmnc = read_pmnc();
@@ -374,20 +367,30 @@ static void xscale_pmu_stop(void)
pmnc &= ~PMU_ENABLE;
write_pmnc(pmnc);
- free_irq(XSCALE_PMU_IRQ, results);
+ free_irq(pmu_irqs->irqs[0], results);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int xscale_pmu_start(void)
{
int ret;
- u32 pmnc = read_pmnc();
+ u32 pmnc;
+
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
+ pmnc = read_pmnc();
- ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
- "XScale PMU", (void *)results);
+ ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt,
+ IRQF_DISABLED, "XScale PMU", (void *)results);
if (ret < 0) {
printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
- XSCALE_PMU_IRQ);
+ pmu_irqs->irqs[0]);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
return ret;
}
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* ARM perf events support v4
@ 2010-01-04 10:48 Jamie Iles
2010-01-04 10:48 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
0 siblings, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
This is the fourth revision of ARM performance counter support. This adds
support for software performance events on all ARM platforms and hardware
support for v6 processors. There is a generic PMU framework to allow for v7
and later to be added in the future.
Jamie Iles (5):
arm: provide a mechanism to reserve performance counters
arm/oprofile: reserve the PMU when starting
arm: use the spinlocked, generic atomic64 support
arm: enable support for software perf events
arm/perfevents: implement perf event support for ARMv6
arch/arm/Kconfig | 16 +
arch/arm/include/asm/atomic.h | 4 +
arch/arm/include/asm/perf_event.h | 38 +
arch/arm/include/asm/pmu.h | 74 ++
arch/arm/kernel/Makefile | 2 +
arch/arm/kernel/perf_event.c | 1338 +++++++++++++++++++++++++++++++
arch/arm/kernel/pmu.c | 107 +++
arch/arm/mm/fault.c | 7 +
arch/arm/oprofile/op_model_arm11_core.c | 4 +-
arch/arm/oprofile/op_model_arm11_core.h | 4 +-
arch/arm/oprofile/op_model_mpcore.c | 42 +-
arch/arm/oprofile/op_model_v6.c | 30 +-
arch/arm/oprofile/op_model_v7.c | 30 +-
arch/arm/oprofile/op_model_v7.h | 4 +-
arch/arm/oprofile/op_model_xscale.c | 35 +-
15 files changed, 1671 insertions(+), 64 deletions(-)
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 1/5] arm: provide a mechanism to reserve performance counters
2010-01-04 10:48 ARM perf events support v4 Jamie Iles
@ 2010-01-04 10:48 ` Jamie Iles
2010-01-04 10:48 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2010-01-06 12:00 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Michał Nazarewicz
0 siblings, 2 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
To add support for perf events and to allow the hardware
counters to be shared with oprofile, we need a way to reserve
access to the pmu (performance monitor unit).
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/Kconfig | 5 ++
arch/arm/include/asm/pmu.h | 74 ++++++++++++++++++++++++++++++
arch/arm/kernel/Makefile | 1 +
arch/arm/kernel/pmu.c | 107 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 187 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/include/asm/pmu.h
create mode 100644 arch/arm/kernel/pmu.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 233a222..9e08891 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -863,6 +863,11 @@ config XSCALE_PMU
depends on CPU_XSCALE && !XSCALE_PMU_TIMER
default y
+config CPU_HAS_PMU
+ depends on CPU_V6 || CPU_V7 || XSCALE_PMU
+ default y
+ bool
+
if !MMU
source "arch/arm/Kconfig-nommu"
endif
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
new file mode 100644
index 0000000..5840d2d
--- /dev/null
+++ b/arch/arm/include/asm/pmu.h
@@ -0,0 +1,74 @@
+/*
+ * linux/arch/arm/include/asm/pmu.h
+ *
+ * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __ARM_PMU_H__
+#define __ARM_PMU_H__
+
+#ifdef CONFIG_CPU_HAS_PMU
+
+struct pmu_irqs {
+ const int *irqs;
+ int num_irqs;
+};
+
+/**
+ * reserve_pmu() - reserve the hardware performance counters
+ *
+ * Reserve the hardware performance counters in the system for exclusive use.
+ * The 'struct pmu_irqs' for the system is returned on success, ERR_PTR()
+ * encoded error on failure.
+ */
+extern const struct pmu_irqs *
+reserve_pmu(void);
+
+/**
+ * release_pmu() - Relinquish control of the performance counters
+ *
+ * Release the performance counters and allow someone else to use them.
+ * Callers must have disabled the counters and released IRQs before calling
+ * this. The 'struct pmu_irqs' returned from reserve_pmu() must be passed as
+ * a cookie.
+ */
+extern int
+release_pmu(const struct pmu_irqs *irqs);
+
+/**
+ * init_pmu() - Initialise the PMU.
+ *
+ * Initialise the system ready for PMU enabling. This should typically set the
+ * IRQ affinity and nothing else. The users (oprofile/perf events etc) will do
+ * the actual hardware initialisation.
+ */
+extern int
+init_pmu(void);
+
+#else /* CONFIG_CPU_HAS_PMU */
+
+static inline const struct pmu_irqs *
+reserve_pmu(void)
+{
+ ERR_PTR(-ENODEV);
+}
+
+static inline int
+release_pmu(const struct pmu_irqs *irqs)
+{
+}
+
+static inline int
+init_pmu(void)
+{
+ return -ENODEV;
+}
+
+#endif /* CONFIG_CPU_HAS_PMU */
+
+#endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index dd00f74..216890d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o
obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o
obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o
obj-$(CONFIG_IWMMXT) += iwmmxt.o
+obj-$(CONFIG_CPU_HAS_PMU) += pmu.o
AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
ifneq ($(CONFIG_ARCH_EBSA110),y)
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
new file mode 100644
index 0000000..a8c015d
--- /dev/null
+++ b/arch/arm/kernel/pmu.c
@@ -0,0 +1,107 @@
+/*
+ * linux/arch/arm/kernel/pmu.c
+ *
+ * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/semaphore.h>
+
+#include <asm/pmu.h>
+
+/*
+ * Define the IRQs for the system. We could use something like a platform
+ * device but that seems fairly heavyweight for this. Also, the performance
+ * counters can't be removed or hotplugged.
+ *
+ * Ordering is important: init_pmu() will use the ordering to set the affinity
+ * to the corresponding core. e.g. the first interrupt will go to cpu 0, the
+ * second goes to cpu 1 etc.
+ */
+static const int irqs[] = {
+#ifdef CONFIG_ARCH_PC3XX
+ IRQ_NPMUIRQ,
+#elif defined(CONFIG_ARCH_OMAP2)
+ 3,
+#elif defined(CONFIG_ARCH_BCMRING)
+ IRQ_PMUIRQ,
+#elif defined(CONFIG_MACH_REALVIEW_EB)
+ IRQ_EB11MP_PMU_CPU0,
+ IRQ_EB11MP_PMU_CPU1,
+ IRQ_EB11MP_PMU_CPU2,
+ IRQ_EB11MP_PMU_CPU3,
+#elif defined(CONFIG_ARCH_OMAP3)
+ INT_34XX_BENCH_MPU_EMUL,
+#elif defined(CONFIG_ARCH_IOP32X)
+ IRQ_IOP32X_CORE_PMU,
+#elif defined(CONFIG_ARCH_IOP33X)
+ IRQ_IOP33X_CORE_PMU,
+#elif defined(CONFIG_ARCH_PXA)
+ IRQ_PMU,
+#endif
+};
+
+static const struct pmu_irqs pmu_irqs = {
+ .irqs = irqs,
+ .num_irqs = ARRAY_SIZE(irqs),
+};
+
+static DECLARE_MUTEX(pmu_mutex);
+
+const struct pmu_irqs *
+reserve_pmu(void)
+{
+ int ret = down_trylock(&pmu_mutex) ? -EBUSY : 0;
+
+ return ret ? ERR_PTR(ret) : &pmu_irqs;
+}
+EXPORT_SYMBOL_GPL(reserve_pmu);
+
+int
+release_pmu(const struct pmu_irqs *irqs)
+{
+ if (WARN_ON(irqs != &pmu_irqs))
+ return -EINVAL;
+ up(&pmu_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(release_pmu);
+
+static int
+set_irq_affinity(int irq,
+ unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+ int err = irq_set_affinity(irq, cpumask_of(cpu));
+ if (err)
+ pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+ irq, cpu);
+ return err;
+#else
+ return 0;
+#endif
+}
+
+int
+init_pmu(void)
+{
+ int i, err = 0;
+
+ for (i = 0; i < pmu_irqs.num_irqs; ++i) {
+ err = set_irq_affinity(pmu_irqs.irqs[i], i);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(init_pmu);
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2010-01-04 10:48 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
@ 2010-01-04 10:48 ` Jamie Iles
2010-01-04 10:48 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Iles
2010-01-06 12:00 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Michał Nazarewicz
1 sibling, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
Make sure that we have access to the performance counters and
that they aren't being used by perf events or anything else.
Cc: Will Deacon <will.deacon@arm.com>
Cc: Jean Pihet <jpihet@mvista.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/oprofile/op_model_arm11_core.c | 4 +-
arch/arm/oprofile/op_model_arm11_core.h | 4 +-
arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
arch/arm/oprofile/op_model_v6.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.h | 4 +-
arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
7 files changed, 85 insertions(+), 64 deletions(-)
diff --git a/arch/arm/oprofile/op_model_arm11_core.c b/arch/arm/oprofile/op_model_arm11_core.c
index ad80752..ef3e265 100644
--- a/arch/arm/oprofile/op_model_arm11_core.c
+++ b/arch/arm/oprofile/op_model_arm11_core.c
@@ -132,7 +132,7 @@ static irqreturn_t arm11_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int arm11_request_interrupts(int *irqs, int nr)
+int arm11_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -153,7 +153,7 @@ int arm11_request_interrupts(int *irqs, int nr)
return ret;
}
-void arm11_release_interrupts(int *irqs, int nr)
+void arm11_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
diff --git a/arch/arm/oprofile/op_model_arm11_core.h b/arch/arm/oprofile/op_model_arm11_core.h
index 6f8538e..1902b99 100644
--- a/arch/arm/oprofile/op_model_arm11_core.h
+++ b/arch/arm/oprofile/op_model_arm11_core.h
@@ -39,7 +39,7 @@
int arm11_setup_pmu(void);
int arm11_start_pmu(void);
int arm11_stop_pmu(void);
-int arm11_request_interrupts(int *, int);
-void arm11_release_interrupts(int *, int);
+int arm11_request_interrupts(const int *, int);
+void arm11_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4ce0f98..f73ce87 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -32,6 +32,7 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
@@ -43,6 +44,7 @@
#include <mach/hardware.h>
#include <mach/board-eb.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -58,6 +60,7 @@
* Bitmask of used SCU counters
*/
static unsigned int scu_em_used;
+static const struct pmu_irqs *pmu_irqs;
/*
* 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
@@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
return 0;
}
-static int arm11_irqs[] = {
- [0] = IRQ_EB11MP_PMU_CPU0,
- [1] = IRQ_EB11MP_PMU_CPU1,
- [2] = IRQ_EB11MP_PMU_CPU2,
- [3] = IRQ_EB11MP_PMU_CPU3
-};
-
static int em_start(void)
{
int ret;
- ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
if (ret == 0) {
em_call_function(arm11_start_pmu);
ret = scu_start();
- if (ret)
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ if (ret) {
+ arm11_release_interrupts(pmu_irqs->irqs,
+ pmu_irqs->num_irqs);
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
}
+
+out:
return ret;
}
static void em_stop(void)
{
em_call_function(arm11_stop_pmu);
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
scu_stop();
+ release_pmu(pmu_irqs);
}
/*
@@ -283,15 +293,7 @@ static int em_setup(void)
em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
- /*
- * Send CP15 PMU interrupts to the owner CPU.
- */
- em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
- em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
- em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
- em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
-
- return 0;
+ return init_pmu();
}
struct op_arm_model_spec op_mpcore_spec = {
diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
index f7d2ec5..a22357a 100644
--- a/arch/arm/oprofile/op_model_v6.c
+++ b/arch/arm/oprofile/op_model_v6.c
@@ -19,39 +19,47 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <asm/irq.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_arm11_core.h"
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP2
- 3,
-#endif
-#ifdef CONFIG_ARCH_BCMRING
- IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv6_pmu_stop(void)
{
arm11_stop_pmu();
- arm11_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv6_pmu_start(void)
{
int ret;
- ret = arm11_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
ret = arm11_start_pmu();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
+out:
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c
index 2088a6c..8642d08 100644
--- a/arch/arm/oprofile/op_model_v7.c
+++ b/arch/arm/oprofile/op_model_v7.c
@@ -11,11 +11,14 @@
*/
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp.h>
+#include <asm/pmu.h>
+
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_v7.h"
@@ -295,7 +298,7 @@ static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int armv7_request_interrupts(int *irqs, int nr)
+int armv7_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -318,7 +321,7 @@ int armv7_request_interrupts(int *irqs, int nr)
return ret;
}
-void armv7_release_interrupts(int *irqs, int nr)
+void armv7_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
@@ -362,12 +365,7 @@ static void armv7_pmnc_dump_regs(void)
}
#endif
-
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP3
- INT_34XX_BENCH_MPU_EMUL,
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv7_pmnc_stop(void)
{
@@ -375,19 +373,29 @@ static void armv7_pmnc_stop(void)
armv7_pmnc_dump_regs();
#endif
armv7_stop_pmnc();
- armv7_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ armv7_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv7_pmnc_start(void)
{
int ret;
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
#ifdef DEBUG
armv7_pmnc_dump_regs();
#endif
- ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ ret = armv7_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
armv7_start_pmnc();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h
index 0e19bcc..9ca334b 100644
--- a/arch/arm/oprofile/op_model_v7.h
+++ b/arch/arm/oprofile/op_model_v7.h
@@ -97,7 +97,7 @@
int armv7_setup_pmu(void);
int armv7_start_pmu(void);
int armv7_stop_pmu(void);
-int armv7_request_interrupts(int *, int);
-void armv7_release_interrupts(int *, int);
+int armv7_request_interrupts(const int *, int);
+void armv7_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
index 724ab9c..1d34a02 100644
--- a/arch/arm/oprofile/op_model_xscale.c
+++ b/arch/arm/oprofile/op_model_xscale.c
@@ -17,12 +17,14 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/cputype.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -33,17 +35,6 @@
#define PMU_RESET (CCNT_RESET | PMN_RESET)
#define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
-/* TODO do runtime detection */
-#ifdef CONFIG_ARCH_IOP32X
-#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_IOP33X
-#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_PXA
-#define XSCALE_PMU_IRQ IRQ_PMU
-#endif
-
/*
* Different types of events that can be counted by the XScale PMU
* as used by Oprofile userspace. Here primarily for documentation
@@ -367,6 +358,8 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
+static const struct pmu_irqs *pmu_irqs;
+
static void xscale_pmu_stop(void)
{
u32 pmnc = read_pmnc();
@@ -374,20 +367,30 @@ static void xscale_pmu_stop(void)
pmnc &= ~PMU_ENABLE;
write_pmnc(pmnc);
- free_irq(XSCALE_PMU_IRQ, results);
+ free_irq(pmu_irqs->irqs[0], results);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int xscale_pmu_start(void)
{
int ret;
- u32 pmnc = read_pmnc();
+ u32 pmnc;
+
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
+ pmnc = read_pmnc();
- ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
- "XScale PMU", (void *)results);
+ ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt,
+ IRQF_DISABLED, "XScale PMU", (void *)results);
if (ret < 0) {
printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
- XSCALE_PMU_IRQ);
+ pmu_irqs->irqs[0]);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
return ret;
}
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 3/5] arm: use the spinlocked, generic atomic64 support
2010-01-04 10:48 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
@ 2010-01-04 10:48 ` Jamie Iles
2010-01-04 10:48 ` [PATCH 4/5] arm: enable support for software perf events Jamie Iles
2010-01-05 18:57 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Lokier
0 siblings, 2 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
perf events require that we can support atomic64's. There is a generic,
spinlocked version that we can use until we have proper hardware
support.
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/Kconfig | 1 +
arch/arm/include/asm/atomic.h | 4 ++++
2 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9e08891..0dbfd9b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -18,6 +18,7 @@ config ARM
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
+ select GENERIC_ATOMIC64
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index d0daeab..ff286a8 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -15,6 +15,10 @@
#include <linux/types.h>
#include <asm/system.h>
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
+
#define ATOMIC_INIT(i) { (i) }
#ifdef __KERNEL__
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 4/5] arm: enable support for software perf events
2010-01-04 10:48 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Iles
@ 2010-01-04 10:48 ` Jamie Iles
2010-01-04 10:48 ` [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6 Jamie Iles
2010-01-04 11:11 ` [PATCH 4/5] arm: enable support for software perf events Russell King - ARM Linux
2010-01-05 18:57 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Lokier
1 sibling, 2 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
The perf events subsystem allows counting of both hardware and
software events. This patch implements the bare minimum for software
performance events.
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
arch/arm/Kconfig | 2 +
arch/arm/include/asm/perf_event.h | 38 +++++++++++++++++++++++++++++++++++++
arch/arm/mm/fault.c | 7 ++++++
3 files changed, 47 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/include/asm/perf_event.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0dbfd9b..337eec4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -19,6 +19,8 @@ config ARM
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
select GENERIC_ATOMIC64
+ select HAVE_PERF_EVENTS
+ select PERF_USE_VMALLOC
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
new file mode 100644
index 0000000..32a66ac
--- /dev/null
+++ b/arch/arm/include/asm/perf_event.h
@@ -0,0 +1,38 @@
+/*
+ * linux/arch/arm/include/asm/perf_event.h
+ *
+ * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __ARM_PERF_EVENT_H__
+#define __ARM_PERF_EVENT_H__
+
+/*
+ * NOP: on *most* (read: all supported) ARM platforms, the performance
+ * counter interrupts are regular interrupts and not an NMI. This
+ * means that when we receive the interrupt we can call
+ * perf_event_do_pending() that handles all of the work with
+ * interrupts enabled.
+ */
+static inline void
+set_perf_event_pending(void)
+{
+}
+
+/* Get the PC. Make sure that we have a 64bit value with the upper 32 cleared.
+ */
+#define perf_instruction_pointer(_regs) \
+ ((u64)instruction_pointer(regs) & 0xFFFFFFFFLU)
+#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
+ PERF_RECORD_MISC_KERNEL)
+
+/* ARM performance counters start from 1 (in the cp15 accesses) so use the
+ * same indexes here for consistency. */
+#define PERF_EVENT_INDEX_OFFSET 1
+
+#endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 10e0680..9d40c34 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -18,6 +18,7 @@
#include <linux/page-flags.h>
#include <linux/sched.h>
#include <linux/highmem.h>
+#include <linux/perf_event.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -302,6 +303,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
fault = __do_page_fault(mm, addr, fsr, tsk);
up_read(&mm->mmap_sem);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr);
+ if (fault & VM_FAULT_MAJOR)
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr);
+ else if (fault & VM_FAULT_MINOR)
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr);
+
/*
* Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
*/
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-04 10:48 ` [PATCH 4/5] arm: enable support for software perf events Jamie Iles
@ 2010-01-04 10:48 ` Jamie Iles
2010-01-04 11:17 ` Russell King - ARM Linux
2010-01-05 18:07 ` Will Deacon
2010-01-04 11:11 ` [PATCH 4/5] arm: enable support for software perf events Russell King - ARM Linux
1 sibling, 2 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 10:48 UTC (permalink / raw)
To: linux-arm-kernel
This patch implements support for ARMv6 performance counters in the
Linux performance events subsystem. ARMv6 architectures that have the
performance counters should enable HW_PERF_EVENTS and define the
interrupts for the counters in arch/arm/kernel/perf_event.c
This implementation also provides an ARM PMU abstraction layer to allow
ARMv7 and others to be supported in the future by adding new a
'struct arm_pmu'.
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jean Pihet <jpihet@mvista.com>
Cc: Will Deacon <will.deacon@arm.com>
---
arch/arm/Kconfig | 8 +
arch/arm/kernel/Makefile | 1 +
arch/arm/kernel/perf_event.c | 1338 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1347 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/kernel/perf_event.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 337eec4..53c3fde 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1175,6 +1175,14 @@ config HIGHPTE
depends on HIGHMEM
depends on !OUTER_CACHE
+config HW_PERF_EVENTS
+ bool "Enable hardware performance counter support for perf events"
+ depends on PERF_EVENTS && CPU_HAS_PMU && CPU_V6
+ default y
+ help
+ Enable hardware performance counter support for perf events. If
+ disabled, perf events will use software events only.
+
source "mm/Kconfig"
config LEDS
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 216890d..c76e6d2 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o
obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o
obj-$(CONFIG_IWMMXT) += iwmmxt.o
obj-$(CONFIG_CPU_HAS_PMU) += pmu.o
+obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
ifneq ($(CONFIG_ARCH_EBSA110),y)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
new file mode 100644
index 0000000..b721968
--- /dev/null
+++ b/arch/arm/kernel/perf_event.c
@@ -0,0 +1,1338 @@
+#undef DEBUG
+
+/*
+ * ARM performance counter support.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * This code is based on the sparc64 perf event code, which is in turn based
+ * on the x86 code. Callchain code is based on the ARM OProfile backtrace
+ * code.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#include <asm/cputype.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+#include <asm/stacktrace.h>
+
+static const struct pmu_irqs *pmu_irqs;
+
+/*
+ * Hardware lock to serialize accesses to PMU registers. Needed for the
+ * read/modify/write sequences.
+ */
+DEFINE_SPINLOCK(pmu_lock);
+
+/*
+ * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
+ * another platform that supports more, we need to increase this to be the
+ * largest of all platforms.
+ */
+#define ARMPMU_MAX_HWEVENTS 4
+
+/* The events for a given CPU. */
+struct cpu_hw_events {
+ /*
+ * The events that are active on the CPU for the given index. Index 0
+ * is reserved.
+ */
+ struct perf_event *events[ARMPMU_MAX_HWEVENTS];
+
+ /*
+ * A 1 bit for an index indicates that the counter is being used for
+ * an event. A 0 means that the counter can be used.
+ */
+ unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+
+ /*
+ * A 1 bit for an index indicates that the counter is actively being
+ * used.
+ */
+ unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+struct arm_pmu {
+ const char *name;
+ irqreturn_t (*handle_irq)(int irq_num, void *dev);
+ void (*enable)(struct hw_perf_event *evt, int idx);
+ void (*disable)(struct hw_perf_event *evt, int idx);
+ int (*event_map)(int evt);
+ u64 (*raw_event)(u64);
+ int (*get_event_idx)(struct cpu_hw_events *cpuc,
+ struct hw_perf_event *hwc);
+ u32 (*read_counter)(int idx);
+ void (*write_counter)(int idx, u32 val);
+ void (*start)(void);
+ void (*stop)(void);
+ int num_events;
+ u64 max_period;
+};
+
+/* Set at runtime when we know what CPU type we are. */
+static const struct arm_pmu *armpmu;
+
+#define HW_OP_UNSUPPORTED 0xFFFF
+
+#define C(_x) \
+ PERF_COUNT_HW_CACHE_##_x
+
+#define CACHE_OP_UNSUPPORTED 0xFFFF
+
+static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static int
+armpmu_map_cache_event(u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result, ret;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
+
+ if (ret == CACHE_OP_UNSUPPORTED)
+ return -ENOENT;
+
+ return ret;
+}
+
+static int
+armpmu_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc,
+ int idx)
+{
+ s64 left = atomic64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0;
+
+ if (unlikely(left <= -period)) {
+ left = period;
+ atomic64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ atomic64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (left > (s64)armpmu->max_period)
+ left = armpmu->max_period;
+
+ atomic64_set(&hwc->prev_count, (u64)-left);
+
+ armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+static u64
+armpmu_event_update(struct perf_event *event,
+ struct hw_perf_event *hwc,
+ int idx)
+{
+ int shift = 64 - 32;
+ s64 prev_raw_count, new_raw_count;
+ s64 delta;
+
+again:
+ prev_raw_count = atomic64_read(&hwc->prev_count);
+ new_raw_count = armpmu->read_counter(idx);
+
+ if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ atomic64_add(delta, &event->count);
+ atomic64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
+}
+
+static void
+armpmu_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ WARN_ON(idx < 0);
+
+ clear_bit(idx, cpuc->active_mask);
+ armpmu->disable(hwc, idx);
+
+ barrier();
+
+ armpmu_event_update(event, hwc, idx);
+ cpuc->events[idx] = NULL;
+ clear_bit(idx, cpuc->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Don't read disabled counters! */
+ if (hwc->idx < 0)
+ return;
+
+ armpmu_event_update(event, hwc, hwc->idx);
+}
+
+static void
+armpmu_unthrottle(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * Set the period again. Some counters can't be stopped, so when we
+ * were throttled we simply disabled the IRQ source and the counter
+ * may have been left counting. If we don't do this step then we may
+ * get an interrupt too soon or *way* too late if the overflow has
+ * happened since disabling.
+ */
+ armpmu_event_set_period(event, hwc, hwc->idx);
+ armpmu->enable(hwc, hwc->idx);
+}
+
+static int
+armpmu_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+ int err = 0;
+
+ /* If we don't have a space for the counter then finish early. */
+ idx = armpmu->get_event_idx(cpuc, hwc);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
+
+ /*
+ * If there is an event in the counter we are going to use then make
+ * sure it is disabled.
+ */
+ event->hw.idx = idx;
+ armpmu->disable(hwc, idx);
+ cpuc->events[idx] = event;
+ set_bit(idx, cpuc->active_mask);
+
+ /* Set the period for the event. */
+ armpmu_event_set_period(event, hwc, idx);
+
+ /* Enable the event. */
+ armpmu->enable(hwc, idx);
+
+ /* Propagate our changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+out:
+ return err;
+}
+
+static struct pmu pmu = {
+ .enable = armpmu_enable,
+ .disable = armpmu_disable,
+ .unthrottle = armpmu_unthrottle,
+ .read = armpmu_read,
+};
+
+static int
+validate_event(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event fake_event = event->hw;
+
+ if (event->pmu && event->pmu != &pmu)
+ return 0;
+
+ return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct cpu_hw_events fake_pmu;
+
+ memset(&fake_pmu, 0, sizeof(fake_pmu));
+
+ if (!validate_event(&fake_pmu, leader))
+ return -ENOSPC;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+ if (!validate_event(&fake_pmu, sibling))
+ return -ENOSPC;
+ }
+
+ if (!validate_event(&fake_pmu, event))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static int
+armpmu_reserve_hardware(void)
+{
+ int i;
+ int err;
+
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ pr_warning("unable to reserve pmu\n");
+ return PTR_ERR(pmu_irqs);
+ }
+
+ init_pmu();
+
+ if (pmu_irqs->num_irqs < 1) {
+ pr_err("no irqs for PMUs defined\n");
+ }
+
+ for (i = 0; i < pmu_irqs->num_irqs; ++i) {
+ err = request_irq(pmu_irqs->irqs[i], armpmu->handle_irq,
+ IRQF_DISABLED, "armpmu", NULL);
+ if (err) {
+ pr_warning("unable to request IRQ%d for ARM "
+ "perf counters\n", pmu_irqs->irqs[i]);
+ break;
+ }
+ }
+
+ if (err) {
+ for (i = i - 1; i >= 0; --i)
+ free_irq(pmu_irqs->irqs[i], NULL);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
+
+ return err;
+}
+
+static void
+armpmu_release_hardware(void)
+{
+ int i;
+
+ for (i = pmu_irqs->num_irqs - 1; i >= 0; --i)
+ free_irq(pmu_irqs->irqs[i], NULL);
+ armpmu->stop();
+
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmu_reserve_mutex);
+
+static void
+hw_perf_event_destroy(struct perf_event *event)
+{
+ if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
+ armpmu_release_hardware();
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int mapping, err;
+
+ /* Decode the generic type into an ARM event identifier. */
+ if (PERF_TYPE_HARDWARE == event->attr.type) {
+ mapping = armpmu->event_map(event->attr.config);
+ } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
+ mapping = armpmu_map_cache_event(event->attr.config);
+ } else if (PERF_TYPE_RAW == event->attr.type) {
+ mapping = armpmu->raw_event(event->attr.config);
+ } else {
+ pr_debug("event type %x not supported\n", event->attr.type);
+ return -EOPNOTSUPP;
+ }
+
+ if (mapping < 0) {
+ pr_debug("event %x:%llx not supported\n", event->attr.type,
+ event->attr.config);
+ return mapping;
+ }
+
+ /*
+ * Check whether we need to exclude the counter from certain modes.
+ * The ARM performance counters are on all of the time so if someone
+ * has asked us for some excludes then we have to fail.
+ */
+ if (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv || event->attr.exclude_idle) {
+ pr_debug("ARM performance counters do not support "
+ "mode exclusion\n");
+ return -EPERM;
+ }
+
+ /*
+ * We don't assign an index until we actually place the event onto
+ * hardware. Use -1 to signify that we haven't decided where to put it
+ * yet. For SMP systems, each core has it's own PMU so we can't do any
+ * clever allocation or constraints checking at this point.
+ */
+ hwc->idx = -1;
+
+ /*
+ * Store the event encoding into the config_base field. config and
+ * event_base are unused as the only 2 things we need to know are
+ * the event mapping and the counter to use. The counter to use is
+ * also the indx and the config_base is the event type.
+ */
+ hwc->config_base = (unsigned long)mapping;
+ hwc->config = 0;
+ hwc->event_base = 0;
+
+ if (!hwc->sample_period) {
+ hwc->sample_period = armpmu->max_period;
+ hwc->last_period = hwc->sample_period;
+ atomic64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+ err = 0;
+ if (event->group_leader != event) {
+ err = validate_group(event);
+ if (err)
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+const struct pmu *
+hw_perf_event_init(struct perf_event *event)
+{
+ int err = 0;
+
+ if (!armpmu)
+ return ERR_PTR(-ENODEV);
+
+ event->destroy = hw_perf_event_destroy;
+
+ if (!atomic_inc_not_zero(&active_events)) {
+ if (atomic_read(&active_events) > perf_max_events) {
+ atomic_dec(&active_events);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ mutex_lock(&pmu_reserve_mutex);
+ if (atomic_read(&active_events) == 0) {
+ err = armpmu_reserve_hardware();
+ }
+
+ if (!err)
+ atomic_inc(&active_events);
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+
+ if (err)
+ return ERR_PTR(err);
+
+ err = __hw_perf_event_init(event);
+ if (err)
+ hw_perf_event_destroy(event);
+
+ return err ? ERR_PTR(err) : &pmu;
+}
+
+void
+hw_perf_enable(void)
+{
+ /* Enable all of the perf events on hardware. */
+ int idx;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ if (!armpmu)
+ return;
+
+ for (idx = 0; idx <= armpmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+
+ if (!event)
+ continue;
+
+ armpmu->enable(&event->hw, idx);
+ }
+
+ armpmu->start();
+}
+
+void
+hw_perf_disable(void)
+{
+ if (armpmu)
+ armpmu->stop();
+}
+
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ * effectively stops the counter from counting.
+ * - disable the counter's interrupt generation (each counter has it's
+ * own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ * - enable the counter's interrupt generation.
+ * - set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+enum armv6_perf_types {
+ ARMV6_PERFCTR_ICACHE_MISS = 0x0,
+ ARMV6_PERFCTR_IBUF_STALL = 0x1,
+ ARMV6_PERFCTR_DDEP_STALL = 0x2,
+ ARMV6_PERFCTR_ITLB_MISS = 0x3,
+ ARMV6_PERFCTR_DTLB_MISS = 0x4,
+ ARMV6_PERFCTR_BR_EXEC = 0x5,
+ ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
+ ARMV6_PERFCTR_INSTR_EXEC = 0x7,
+ ARMV6_PERFCTR_DCACHE_HIT = 0x9,
+ ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
+ ARMV6_PERFCTR_DCACHE_MISS = 0xB,
+ ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
+ ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
+ ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
+ ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
+ ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
+ ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
+ ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
+ ARMV6_PERFCTR_NOP = 0x20,
+};
+
+enum armv6_counters {
+ ARMV6_CYCLE_COUNTER = 1,
+ ARMV6_COUNTER0,
+ ARMV6_COUNTER1,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
+ [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ /*
+ * The performance counters don't differentiate between read
+ * and write accesses/misses so this isn't strictly correct,
+ * but it's the best we can do. Writes and reads get
+ * combined.
+ */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(DTLB)] = {
+ /*
+ * The ARM performance counters can count micro DTLB misses,
+ * micro ITLB misses and main TLB misses. There isn't an event
+ * for TLB misses, so use the micro misses here and if users
+ * want the main TLB misses they can use a raw counter.
+ */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
+enum armv6mpcore_perf_types {
+ ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
+ ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
+ ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
+ ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
+ ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
+ ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
+ ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
+ ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
+ ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
+ ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
+ ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
+ ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
+ ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
+ ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
+ ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
+ ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
+ ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
+ ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
+ ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
+ ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
+ [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+ [C(RESULT_MISS)] =
+ ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+ [C(RESULT_MISS)] =
+ ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(DTLB)] = {
+ /*
+ * The ARM performance counters can count micro DTLB misses,
+ * micro ITLB misses and main TLB misses. There isn't an event
+ * for TLB misses, so use the micro misses here and if users
+ * want the main TLB misses they can use a raw counter.
+ */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
+ return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+ asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE (1 << 0)
+#define ARMV6_PMCR_CTR01_RESET (1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
+#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
+#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+ (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+ ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+ return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+ enum armv6_counters counter)
+{
+ int ret;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+ else if (ARMV6_COUNTER0 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+ else if (ARMV6_COUNTER1 == counter)
+ ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+ else
+ BUG();
+
+ return ret;
+}
+
+static inline u32
+armv6pmu_read_counter(int counter)
+{
+ unsigned long value;
+
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
+ else
+ BUG();
+
+ return value;
+}
+
+static inline void
+armv6pmu_write_counter(int counter,
+ u32 value)
+{
+ if (ARMV6_CYCLE_COUNTER == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
+ else if (ARMV6_COUNTER0 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
+ else if (ARMV6_COUNTER1 == counter)
+ asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
+ else
+ BUG();
+}
+
+void
+armv6pmu_enable_event(struct hw_perf_event *hwc,
+ int idx)
+{
+ unsigned long val, mask, evt, flags;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = 0;
+ evt = ARMV6_PMCR_CCOUNT_IEN;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+ ARMV6_PMCR_COUNT0_IEN;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+ ARMV6_PMCR_COUNT1_IEN;
+ } else {
+ BUG();
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the event
+ * that we're interested in.
+ */
+ spin_lock_irqsave(&pmu_lock, flags);
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+ spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(int irq_num,
+ void *dev)
+{
+ unsigned long pmcr = armv6_pmcr_read();
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ struct pt_regs *regs;
+ int idx;
+
+ if (!armv6_pmcr_has_overflowed(pmcr))
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ /*
+ * The interrupts are cleared by writing the overflow flags back to
+ * the control register. All of the other bits don't have any effect
+ * if they are rewritten, so write the whole value back.
+ */
+ armv6_pmcr_write(pmcr);
+
+ data.addr = 0;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+ for (idx = 0; idx <= armpmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+ continue;
+
+ hwc = &event->hw;
+ armpmu_event_update(event, hwc, idx);
+ data.period = event->hw.last_period;
+ if (!armpmu_event_set_period(event, hwc, idx))
+ continue;
+
+ if (perf_event_overflow(event, 0, &data, regs))
+ armpmu->disable(hwc, idx);
+ }
+
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts enabled. For
+ * platforms that can have the PMU interrupts raised as a PMI, this
+ * will not work.
+ */
+ perf_event_do_pending();
+
+ return IRQ_HANDLED;
+}
+
+static void
+armv6pmu_start(void)
+{
+ unsigned long flags, val;
+
+ spin_lock_irqsave(&pmu_lock, flags);
+ val = armv6_pmcr_read();
+ val |= ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+ spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+void
+armv6pmu_stop(void)
+{
+ unsigned long flags, val;
+
+ spin_lock_irqsave(&pmu_lock, flags);
+ val = armv6_pmcr_read();
+ val &= ~ARMV6_PMCR_ENABLE;
+ armv6_pmcr_write(val);
+ spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static inline int
+armv6pmu_event_map(int config)
+{
+ int mapping = armv6_perf_map[config];
+ if (HW_OP_UNSUPPORTED == mapping)
+ mapping = -EOPNOTSUPP;
+ return mapping;
+}
+
+static inline int
+armv6mpcore_pmu_event_map(int config)
+{
+ int mapping = armv6mpcore_perf_map[config];
+ if (HW_OP_UNSUPPORTED == mapping)
+ mapping = -EOPNOTSUPP;
+ return mapping;
+}
+
+static u64
+armv6pmu_raw_event(u64 config)
+{
+ return config & 0xff;
+}
+
+static int
+armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+ struct hw_perf_event *event)
+{
+ /* Always place a cycle counter into the cycle counter. */
+ if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
+ if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+ return -EAGAIN;
+
+ return ARMV6_CYCLE_COUNTER;
+ } else {
+ /*
+ * For anything other than a cycle counter, try and use
+ * counter0 and counter1.
+ */
+ if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
+ return ARMV6_COUNTER1;
+ }
+
+ if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
+ return ARMV6_COUNTER0;
+ }
+
+ /* The counters are all in use. */
+ return -EAGAIN;
+ }
+}
+
+static void
+armv6pmu_disable_event(struct hw_perf_event *hwc,
+ int idx)
+{
+ unsigned long val, mask, evt, flags;
+
+ if (ARMV6_CYCLE_COUNTER == idx) {
+ mask = ARMV6_PMCR_CCOUNT_IEN;
+ evt = 0;
+ } else if (ARMV6_COUNTER0 == idx) {
+ mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+ } else if (ARMV6_COUNTER1 == idx) {
+ mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+ evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+ } else {
+ BUG();
+ }
+
+ /*
+ * Mask out the current event and set the counter to count the number
+ * of ETM bus signal assertion cycles. The external reporting should
+ * be disabled and so this should never increment.
+ */
+ spin_lock_irqsave(&pmu_lock, flags);
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+ spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
+ int idx)
+{
+ unsigned long val, mask, flags, evt = 0;
+
+ if (ARMV6_CYCLE_COUNTER == idx)
+ mask = ARMV6_PMCR_CCOUNT_IEN;
+ else if (ARMV6_COUNTER0 == idx)
+ mask = ARMV6_PMCR_COUNT0_IEN;
+ else if (ARMV6_COUNTER1 == idx)
+ mask = ARMV6_PMCR_COUNT1_IEN;
+ else
+ BUG();
+
+ /*
+ * Unlike UP ARMv6, we don't have a way of stopping the counters. We
+ * simply disable the interrupt reporting.
+ */
+ spin_lock_irqsave(&pmu_lock, flags);
+ val = armv6_pmcr_read();
+ val &= ~mask;
+ val |= evt;
+ armv6_pmcr_write(val);
+ spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static const struct arm_pmu armv6pmu = {
+ .name = "v6",
+ .handle_irq = armv6pmu_handle_irq,
+ .enable = armv6pmu_enable_event,
+ .disable = armv6pmu_disable_event,
+ .event_map = armv6pmu_event_map,
+ .raw_event = armv6pmu_raw_event,
+ .read_counter = armv6pmu_read_counter,
+ .write_counter = armv6pmu_write_counter,
+ .get_event_idx = armv6pmu_get_event_idx,
+ .start = armv6pmu_start,
+ .stop = armv6pmu_stop,
+ .num_events = 3,
+ .max_period = (1LLU << 32) - 1,
+};
+
+/*
+ * ARMv6mpcore is almost identical to single core ARMv6 with the exception
+ * that some of the events have different enumerations and that there is no
+ * *hack* to stop the programmable counters. To stop the counters we simply
+ * disable the interrupt reporting and update the event. When unthrottling we
+ * reset the period and enable the interrupt reporting.
+ */
+static const struct arm_pmu armv6mpcore_pmu = {
+ .name = "v6mpcore",
+ .handle_irq = armv6pmu_handle_irq,
+ .enable = armv6pmu_enable_event,
+ .disable = armv6mpcore_pmu_disable_event,
+ .event_map = armv6mpcore_pmu_event_map,
+ .raw_event = armv6pmu_raw_event,
+ .read_counter = armv6pmu_read_counter,
+ .write_counter = armv6pmu_write_counter,
+ .get_event_idx = armv6pmu_get_event_idx,
+ .start = armv6pmu_start,
+ .stop = armv6pmu_stop,
+ .num_events = 3,
+ .max_period = (1LLU << 32) - 1,
+};
+
+static int __init
+init_hw_perf_events(void)
+{
+#define CPUID_MASK 0xFFF0
+ unsigned long cpuid = read_cpuid_id() & CPUID_MASK;
+
+ switch (cpuid) {
+ case 0xB360: /* ARM1136 */
+ case 0xB560: /* ARM1156 */
+ case 0xB760: /* ARM1176 */
+ armpmu = &armv6pmu;
+ memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
+ sizeof(armv6_perf_cache_map));
+ perf_max_events = armv6pmu.num_events;
+ break;
+ case 0xB020: /* ARM11mpcore */
+ armpmu = &armv6mpcore_pmu;
+ memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map,
+ sizeof(armv6mpcore_perf_cache_map));
+ perf_max_events = armv6mpcore_pmu.num_events;
+ break;
+ default:
+ pr_info("no hardware support available\n");
+ perf_max_events = -1;
+ }
+
+ if (armpmu)
+ pr_info("enabled with %s PMU driver\n",
+ armpmu->name);
+
+ return 0;
+}
+arch_initcall(init_hw_perf_events);
+
+/*
+ * Callchain handling code.
+ */
+static inline void
+callchain_store(struct perf_callchain_entry *entry,
+ u64 ip)
+{
+ if (entry->nr < PERF_MAX_STACK_DEPTH)
+ entry->ip[entry->nr++] = ip;
+}
+
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct frame_tail {
+ struct frame_tail *fp;
+ unsigned long sp;
+ unsigned long lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail *
+user_backtrace(struct frame_tail *tail,
+ struct perf_callchain_entry *entry)
+{
+ struct frame_tail buftail;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+ return NULL;
+ if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
+ return NULL;
+
+ callchain_store(entry, buftail.lr);
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail >= buftail.fp)
+ return NULL;
+
+ return buftail.fp - 1;
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ struct frame_tail *tail;
+
+ callchain_store(entry, PERF_CONTEXT_USER);
+
+ if (!user_mode(regs))
+ regs = task_pt_regs(current);
+
+ tail = (struct frame_tail *)regs->ARM_fp - 1;
+
+ while (tail && !((unsigned long)tail & 0x3))
+ tail = user_backtrace(tail, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+ void *data)
+{
+ struct perf_callchain_entry *entry = data;
+ callchain_store(entry, fr->pc);
+ return 0;
+}
+
+static void
+perf_callchain_kernel(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ struct stackframe fr;
+
+ callchain_store(entry, PERF_CONTEXT_KERNEL);
+ fr.fp = regs->ARM_fp;
+ fr.sp = regs->ARM_sp;
+ fr.lr = regs->ARM_lr;
+ fr.pc = regs->ARM_pc;
+ walk_stackframe(&fr, callchain_trace, entry);
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ int is_user;
+
+ if (!regs)
+ return;
+
+ is_user = user_mode(regs);
+
+ if (!current || !current->pid)
+ return;
+
+ if (is_user && current->state != TASK_RUNNING)
+ return;
+
+ if (!is_user)
+ perf_callchain_kernel(regs, entry);
+
+ if (current->mm)
+ perf_callchain_user(regs, entry);
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+
+struct perf_callchain_entry *
+perf_callchain(struct pt_regs *regs)
+{
+ struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
+
+ entry->nr = 0;
+ perf_do_callchain(regs, entry);
+ return entry;
+}
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 4/5] arm: enable support for software perf events
2010-01-04 10:48 ` [PATCH 4/5] arm: enable support for software perf events Jamie Iles
2010-01-04 10:48 ` [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6 Jamie Iles
@ 2010-01-04 11:11 ` Russell King - ARM Linux
2010-01-04 12:26 ` Jamie Iles
1 sibling, 1 reply; 30+ messages in thread
From: Russell King - ARM Linux @ 2010-01-04 11:11 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jan 04, 2010 at 10:48:41AM +0000, Jamie Iles wrote:
> +/* Get the PC. Make sure that we have a 64bit value with the upper 32 cleared.
> + */
> +#define perf_instruction_pointer(_regs) \
> + ((u64)instruction_pointer(regs) & 0xFFFFFFFFLU)
Maybe we should make pt_regs entries be unsigned instead of having this?
Nothing should be affected by that change - and it looks like x86 went
through this change, making two pt_regs structures, one for userspace
with signed ints/longs, and one for kernel space with unsigned ints/longs.
I think it would make more sense to do that rather than litter the kernel
with casts like the above.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-04 10:48 ` [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6 Jamie Iles
@ 2010-01-04 11:17 ` Russell King - ARM Linux
2010-01-04 11:46 ` Jamie Iles
2010-01-05 18:07 ` Will Deacon
1 sibling, 1 reply; 30+ messages in thread
From: Russell King - ARM Linux @ 2010-01-04 11:17 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jan 04, 2010 at 10:48:42AM +0000, Jamie Iles wrote:
> This patch implements support for ARMv6 performance counters in the
> Linux performance events subsystem. ARMv6 architectures that have the
> performance counters should enable HW_PERF_EVENTS and define the
> interrupts for the counters in arch/arm/kernel/perf_event.c
Only comment is the number of BUG()s in this code - I count 6 of them.
BUG() is not meant to be used as an "assert" for when expectations are
not met - does it really make sense to crash the kernel when any of
these are hit? Are you at risk of losing data if one of your conditions
isn't satisfied?
You may want to consider using WARN_ON() instead - at least that doesn't
take out the kernel.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-04 11:17 ` Russell King - ARM Linux
@ 2010-01-04 11:46 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 11:46 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jan 04, 2010 at 11:17:33AM +0000, Russell King - ARM Linux wrote:
> On Mon, Jan 04, 2010 at 10:48:42AM +0000, Jamie Iles wrote:
> > This patch implements support for ARMv6 performance counters in the
> > Linux performance events subsystem. ARMv6 architectures that have the
> > performance counters should enable HW_PERF_EVENTS and define the
> > interrupts for the counters in arch/arm/kernel/perf_event.c
>
> Only comment is the number of BUG()s in this code - I count 6 of them.
> BUG() is not meant to be used as an "assert" for when expectations are
> not met - does it really make sense to crash the kernel when any of
> these are hit? Are you at risk of losing data if one of your conditions
> isn't satisfied?
>
> You may want to consider using WARN_ON() instead - at least that doesn't
> take out the kernel.
Ok, good point. I've replaced these with WARN_ONCE()'s so we don't get too
much noise with repeated accesses.
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 4/5] arm: enable support for software perf events
2010-01-04 11:11 ` [PATCH 4/5] arm: enable support for software perf events Russell King - ARM Linux
@ 2010-01-04 12:26 ` Jamie Iles
2010-01-04 12:32 ` Russell King - ARM Linux
0 siblings, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-04 12:26 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jan 04, 2010 at 11:11:37AM +0000, Russell King - ARM Linux wrote:
> On Mon, Jan 04, 2010 at 10:48:41AM +0000, Jamie Iles wrote:
> > +/* Get the PC. Make sure that we have a 64bit value with the upper 32 cleared.
> > + */
> > +#define perf_instruction_pointer(_regs) \
> > + ((u64)instruction_pointer(regs) & 0xFFFFFFFFLU)
>
> Maybe we should make pt_regs entries be unsigned instead of having this?
> Nothing should be affected by that change - and it looks like x86 went
> through this change, making two pt_regs structures, one for userspace
> with signed ints/longs, and one for kernel space with unsigned ints/longs.
>
> I think it would make more sense to do that rather than litter the kernel
> with casts like the above.
Ok, how about this?
Jamie
[PATCH] arm ptrace: use unsigned types for kernel pt_regs
Make registers unsigned for kernel space. This is important for
example in the perf events where the PC is stored into a u64. We
don't want it sign extended so make the regs unsigned to prevent
casts throughout the kernel.
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
---
arch/arm/include/asm/ptrace.h | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index bbecccd..eec6e89 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -97,9 +97,15 @@
* stack during a system call. Note that sizeof(struct pt_regs)
* has to be a multiple of 8.
*/
+#ifndef __KERNEL__
struct pt_regs {
long uregs[18];
};
+#else /* __KERNEL__ */
+struct pt_regs {
+ unsigned long uregs[18];
+};
+#endif /* __KERNEL__ */
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 4/5] arm: enable support for software perf events
2010-01-04 12:26 ` Jamie Iles
@ 2010-01-04 12:32 ` Russell King - ARM Linux
0 siblings, 0 replies; 30+ messages in thread
From: Russell King - ARM Linux @ 2010-01-04 12:32 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jan 04, 2010 at 12:26:24PM +0000, Jamie Iles wrote:
> On Mon, Jan 04, 2010 at 11:11:37AM +0000, Russell King - ARM Linux wrote:
> > On Mon, Jan 04, 2010 at 10:48:41AM +0000, Jamie Iles wrote:
> > > +/* Get the PC. Make sure that we have a 64bit value with the upper 32 cleared.
> > > + */
> > > +#define perf_instruction_pointer(_regs) \
> > > + ((u64)instruction_pointer(regs) & 0xFFFFFFFFLU)
> >
> > Maybe we should make pt_regs entries be unsigned instead of having this?
> > Nothing should be affected by that change - and it looks like x86 went
> > through this change, making two pt_regs structures, one for userspace
> > with signed ints/longs, and one for kernel space with unsigned ints/longs.
> >
> > I think it would make more sense to do that rather than litter the kernel
> > with casts like the above.
> Ok, how about this?
Perfect.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-04 10:48 ` [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6 Jamie Iles
2010-01-04 11:17 ` Russell King - ARM Linux
@ 2010-01-05 18:07 ` Will Deacon
2010-01-05 18:23 ` Jean Pihet
2010-01-05 22:26 ` Jamie Iles
1 sibling, 2 replies; 30+ messages in thread
From: Will Deacon @ 2010-01-05 18:07 UTC (permalink / raw)
To: linux-arm-kernel
Hi Jamie,
I've been trying to test your patches with a quad-core ARM 11MPCore on a
Realview PB11MP board.
Unfortunately, I occasionally experience a complete system hang during some
profiling runs. I don't think it's your fault however, as it can occur even
when monitoring only software events. I've managed to reproduce this on the
tip/master branch and got the following information [I enabled lock debugging]:
=================================
[ INFO: inconsistent lock state ]
2.6.33-rc2-tip+ #5
---------------------------------
inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
perf/1463 [HC0[0]:SC0[0]:HE1:SE1] takes:
(&ctx->lock){?.....}, at: [<c009e538>] __perf_event_sched_in+0x20/0x168
{IN-HARDIRQ-W} state was registered at:
[<c00718b0>] __lock_acquire+0x5c8/0x17b4
[<c0072b70>] lock_acquire+0xd4/0xec
[<c0315700>] _raw_spin_lock+0x2c/0x3c
[<c00a20f8>] perf_ctx_adjust_freq+0xc/0x1dc
[<c00a230c>] perf_event_task_tick+0x44/0xb4
[<c0041f4c>] scheduler_tick+0xec/0x144
[<c0053eb8>] update_process_times+0x40/0x4c
[<c006d91c>] tick_periodic+0xdc/0x108
[<c006d96c>] tick_handle_periodic+0x24/0xf0
[<c0030124>] ipi_timer+0x34/0x44
[<c002a3cc>] do_local_timer+0x50/0x80
[<c002aca4>] __irq_svc+0x44/0xe0
[<c002c388>] default_idle+0x28/0x2c
[<c002c8ac>] cpu_idle+0x8c/0xe4
[<70008080>] 0x70008080
irq event stamp: 454
hardirqs last enabled at (454): [<c0315d30>] _raw_spin_unlock_irq+0x24/0x2c
hardirqs last disabled at (453): [<c0315784>] _raw_spin_lock_irq+0x18/0x50
softirqs last enabled at (0): [<c0045650>] copy_process+0x328/0xf70
softirqs last disabled at (0): [<(null)>] (null)
other info that might help us debug this:
no locks held by perf/1463.
stack backtrace:
[<c0031694>] (unwind_backtrace+0x0/0xd4) from [<c006ff88>] (print_usage_bug+0x16c/0x1ac)
[<c006ff88>] (print_usage_bug+0x16c/0x1ac) from [<c00702f0>] (mark_lock+0x328/0x5f0)
[<c00702f0>] (mark_lock+0x328/0x5f0) from [<c007193c>] (__lock_acquire+0x654/0x17b4)
[<c007193c>] (__lock_acquire+0x654/0x17b4) from [<c0072b70>] (lock_acquire+0xd4/0xec)
[<c0072b70>] (lock_acquire+0xd4/0xec) from [<c0315700>] (_raw_spin_lock+0x2c/0x3c)
[<c0315700>] (_raw_spin_lock+0x2c/0x3c) from [<c009e538>] (__perf_event_sched_in+0x20/0x168)
[<c009e538>] (__perf_event_sched_in+0x20/0x168) from [<c009e6c8>] (perf_event_task_sched_in+0x48/0x58)
[<c009e6c8>] (perf_event_task_sched_in+0x48/0x58) from [<c003f2dc>] (finish_task_switch+0x34/0xb4)
[<c003f2dc>] (finish_task_switch+0x34/0xb4) from [<c03133e0>] (schedule+0x728/0x834)
[<c03133e0>] (schedule+0x728/0x834) from [<c00d5190>] (pipe_wait+0x64/0x84)
[<c00d5190>] (pipe_wait+0x64/0x84) from [<c00d59d8>] (pipe_read+0x3ac/0x428)
[<c00d59d8>] (pipe_read+0x3ac/0x428) from [<c00cdd34>] (do_sync_read+0x94/0xe0)
[<c00cdd34>] (do_sync_read+0x94/0xe0) from [<c00ce868>] (vfs_read+0xa8/0x150)
[<c00ce868>] (vfs_read+0xa8/0x150) from [<c00ce9bc>] (sys_read+0x3c/0x68)
[<c00ce9bc>] (sys_read+0x3c/0x68) from [<c002b140>] (ret_fast_syscall+0x0/0x38)
This occurs more frequently when profiling code that spawns child processes
[e.g. gcc] but I've see it happen with sleep too. It also happens if I pass
maxcpus=1 as a bootarg.
Have you seen this happen on your board? I've failed to reproduce it on my
x86 machine.
Will
* Jamie Iles wrote:
> This patch implements support for ARMv6 performance counters in the
> Linux performance events subsystem. ARMv6 architectures that have the
> performance counters should enable HW_PERF_EVENTS and define the
> interrupts for the counters in arch/arm/kernel/perf_event.c
>
> This implementation also provides an ARM PMU abstraction layer to allow
> ARMv7 and others to be supported in the future by adding new a
> 'struct arm_pmu'.
>
> Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Jean Pihet <jpihet@mvista.com>
> Cc: Will Deacon <will.deacon@arm.com>
> ---
> arch/arm/Kconfig | 8 +
> arch/arm/kernel/Makefile | 1 +
> arch/arm/kernel/perf_event.c | 1338 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 1347 insertions(+), 0 deletions(-)
> create mode 100644 arch/arm/kernel/perf_event.c
<truncated>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-05 18:07 ` Will Deacon
@ 2010-01-05 18:23 ` Jean Pihet
2010-01-05 22:26 ` Jamie Iles
1 sibling, 0 replies; 30+ messages in thread
From: Jean Pihet @ 2010-01-05 18:23 UTC (permalink / raw)
To: linux-arm-kernel
Hi Will, Jamie,
On Tuesday 05 January 2010 19:07:44 Will Deacon wrote:
> Hi Jamie,
>
> I've been trying to test your patches with a quad-core ARM 11MPCore on a
> Realview PB11MP board.
>
> Unfortunately, I occasionally experience a complete system hang during some
> profiling runs. I don't think it's your fault however, as it can occur even
> when monitoring only software events. I've managed to reproduce this on the
> tip/master branch and got the following information [I enabled lock
> debugging]:
This is the exact same problem as I have when stressing the system a little
bit.
Jean
>
> =================================
> [ INFO: inconsistent lock state ]
> 2.6.33-rc2-tip+ #5
> ---------------------------------
> inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
> perf/1463 [HC0[0]:SC0[0]:HE1:SE1] takes:
> (&ctx->lock){?.....}, at: [<c009e538>] __perf_event_sched_in+0x20/0x168
> {IN-HARDIRQ-W} state was registered at:
> [<c00718b0>] __lock_acquire+0x5c8/0x17b4
> [<c0072b70>] lock_acquire+0xd4/0xec
> [<c0315700>] _raw_spin_lock+0x2c/0x3c
> [<c00a20f8>] perf_ctx_adjust_freq+0xc/0x1dc
> [<c00a230c>] perf_event_task_tick+0x44/0xb4
> [<c0041f4c>] scheduler_tick+0xec/0x144
> [<c0053eb8>] update_process_times+0x40/0x4c
> [<c006d91c>] tick_periodic+0xdc/0x108
> [<c006d96c>] tick_handle_periodic+0x24/0xf0
> [<c0030124>] ipi_timer+0x34/0x44
> [<c002a3cc>] do_local_timer+0x50/0x80
> [<c002aca4>] __irq_svc+0x44/0xe0
> [<c002c388>] default_idle+0x28/0x2c
> [<c002c8ac>] cpu_idle+0x8c/0xe4
> [<70008080>] 0x70008080
> irq event stamp: 454
> hardirqs last enabled at (454): [<c0315d30>]
> _raw_spin_unlock_irq+0x24/0x2c hardirqs last disabled at (453):
> [<c0315784>] _raw_spin_lock_irq+0x18/0x50 softirqs last enabled at (0):
> [<c0045650>] copy_process+0x328/0xf70 softirqs last disabled at (0):
> [<(null)>] (null)
>
> other info that might help us debug this:
> no locks held by perf/1463.
>
> stack backtrace:
> [<c0031694>] (unwind_backtrace+0x0/0xd4) from [<c006ff88>]
> (print_usage_bug+0x16c/0x1ac) [<c006ff88>] (print_usage_bug+0x16c/0x1ac)
> from [<c00702f0>] (mark_lock+0x328/0x5f0) [<c00702f0>]
> (mark_lock+0x328/0x5f0) from [<c007193c>] (__lock_acquire+0x654/0x17b4)
> [<c007193c>] (__lock_acquire+0x654/0x17b4) from [<c0072b70>]
> (lock_acquire+0xd4/0xec) [<c0072b70>] (lock_acquire+0xd4/0xec) from
> [<c0315700>] (_raw_spin_lock+0x2c/0x3c) [<c0315700>]
> (_raw_spin_lock+0x2c/0x3c) from [<c009e538>]
> (__perf_event_sched_in+0x20/0x168) [<c009e538>]
> (__perf_event_sched_in+0x20/0x168) from [<c009e6c8>]
> (perf_event_task_sched_in+0x48/0x58) [<c009e6c8>]
> (perf_event_task_sched_in+0x48/0x58) from [<c003f2dc>]
> (finish_task_switch+0x34/0xb4) [<c003f2dc>] (finish_task_switch+0x34/0xb4)
> from [<c03133e0>] (schedule+0x728/0x834) [<c03133e0>]
> (schedule+0x728/0x834) from [<c00d5190>] (pipe_wait+0x64/0x84) [<c00d5190>]
> (pipe_wait+0x64/0x84) from [<c00d59d8>] (pipe_read+0x3ac/0x428)
> [<c00d59d8>] (pipe_read+0x3ac/0x428) from [<c00cdd34>]
> (do_sync_read+0x94/0xe0) [<c00cdd34>] (do_sync_read+0x94/0xe0) from
> [<c00ce868>] (vfs_read+0xa8/0x150) [<c00ce868>] (vfs_read+0xa8/0x150) from
> [<c00ce9bc>] (sys_read+0x3c/0x68) [<c00ce9bc>] (sys_read+0x3c/0x68) from
> [<c002b140>] (ret_fast_syscall+0x0/0x38)
>
> This occurs more frequently when profiling code that spawns child processes
> [e.g. gcc] but I've see it happen with sleep too. It also happens if I pass
> maxcpus=1 as a bootarg.
>
> Have you seen this happen on your board? I've failed to reproduce it on my
> x86 machine.
>
> Will
>
> * Jamie Iles wrote:
> > This patch implements support for ARMv6 performance counters in the
> > Linux performance events subsystem. ARMv6 architectures that have the
> > performance counters should enable HW_PERF_EVENTS and define the
> > interrupts for the counters in arch/arm/kernel/perf_event.c
> >
> > This implementation also provides an ARM PMU abstraction layer to allow
> > ARMv7 and others to be supported in the future by adding new a
> > 'struct arm_pmu'.
> >
> > Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: Ingo Molnar <mingo@elte.hu>
> > Cc: Jean Pihet <jpihet@mvista.com>
> > Cc: Will Deacon <will.deacon@arm.com>
> > ---
> > arch/arm/Kconfig | 8 +
> > arch/arm/kernel/Makefile | 1 +
> > arch/arm/kernel/perf_event.c | 1338
> > ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1347
> > insertions(+), 0 deletions(-)
> > create mode 100644 arch/arm/kernel/perf_event.c
>
> <truncated>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 3/5] arm: use the spinlocked, generic atomic64 support
2010-01-04 10:48 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Iles
2010-01-04 10:48 ` [PATCH 4/5] arm: enable support for software perf events Jamie Iles
@ 2010-01-05 18:57 ` Jamie Lokier
2010-01-05 19:08 ` Jamie Iles
1 sibling, 1 reply; 30+ messages in thread
From: Jamie Lokier @ 2010-01-05 18:57 UTC (permalink / raw)
To: linux-arm-kernel
Jamie Iles wrote:
> perf events require that we can support atomic64's. There is a generic,
> spinlocked version that we can use until we have proper hardware
> support.
Is that simply waiting for LDREXD/STREXD-based atomic64's, or is it
depending on something subtler?
-- Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 3/5] arm: use the spinlocked, generic atomic64 support
2010-01-05 18:57 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Lokier
@ 2010-01-05 19:08 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-05 19:08 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Jan 05, 2010 at 06:57:32PM +0000, Jamie Lokier wrote:
> Jamie Iles wrote:
> > perf events require that we can support atomic64's. There is a generic,
> > spinlocked version that we can use until we have proper hardware
> > support.
>
> Is that simply waiting for LDREXD/STREXD-based atomic64's, or is it
> depending on something subtler?
Yes, Will Deacon submitted a patch last month that does this but afaik it
hasn't been merged in yet.
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-05 18:07 ` Will Deacon
2010-01-05 18:23 ` Jean Pihet
@ 2010-01-05 22:26 ` Jamie Iles
2010-01-05 22:31 ` Russell King - ARM Linux
1 sibling, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-05 22:26 UTC (permalink / raw)
To: linux-arm-kernel
Hi Will,
On Tue, Jan 05, 2010 at 06:07:44PM -0000, Will Deacon wrote:
> I've been trying to test your patches with a quad-core ARM 11MPCore on a
> Realview PB11MP board.
>
> Unfortunately, I occasionally experience a complete system hang during some
> profiling runs. I don't think it's your fault however, as it can occur even
> when monitoring only software events. I've managed to reproduce this on the
> tip/master branch and got the following information [I enabled lock debugging]:
Could it be to do with the fact that perf_event_task_sched_in() expects
interrupts to be disabled but on ARM we have __ARCH_WANT_INTERRUPTS_ON_CTXSW
defined and therefore run with interrupts enabled? If so, I'm not sure what
the fix is!
At the moment, ARM is the only platform that context switches with interrupts
enabled and has perf event support.
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-05 22:26 ` Jamie Iles
@ 2010-01-05 22:31 ` Russell King - ARM Linux
2010-01-06 0:18 ` Jamie Iles
0 siblings, 1 reply; 30+ messages in thread
From: Russell King - ARM Linux @ 2010-01-05 22:31 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Jan 05, 2010 at 10:26:58PM +0000, Jamie Iles wrote:
> Hi Will,
>
> On Tue, Jan 05, 2010 at 06:07:44PM -0000, Will Deacon wrote:
> > I've been trying to test your patches with a quad-core ARM 11MPCore on a
> > Realview PB11MP board.
> >
> > Unfortunately, I occasionally experience a complete system hang during some
> > profiling runs. I don't think it's your fault however, as it can occur even
> > when monitoring only software events. I've managed to reproduce this on the
> > tip/master branch and got the following information [I enabled lock debugging]:
> Could it be to do with the fact that perf_event_task_sched_in() expects
> interrupts to be disabled but on ARM we have __ARCH_WANT_INTERRUPTS_ON_CTXSW
> defined and therefore run with interrupts enabled? If so, I'm not sure what
> the fix is!
>
> At the moment, ARM is the only platform that context switches with interrupts
> enabled and has perf event support.
If perf event support is only safe with interrupts disabled, it should
disable them. Maybe a patch to do that conditional on
__ARCH_WANT_INTERRUPTS_ON_CTXSW would be more acceptable than an
unconditional one - don't know.
We could only define __ARCH_WANT_INTERRUPTS_ON_CTXSW for VIVT supporting
kernels, which is the reason for it existing (the interrupt latency for
VIVT would otherwise be unacceptable.) This approach would mean that
perf events wouldn't be usable on VIVT CPUs (which includes Xscale CPUs.)
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-05 22:31 ` Russell King - ARM Linux
@ 2010-01-06 0:18 ` Jamie Iles
2010-01-06 12:09 ` Will Deacon
0 siblings, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-06 0:18 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Jan 05, 2010 at 10:31:47PM +0000, Russell King - ARM Linux wrote:
> On Tue, Jan 05, 2010 at 10:26:58PM +0000, Jamie Iles wrote:
> > Hi Will,
> >
> > On Tue, Jan 05, 2010 at 06:07:44PM -0000, Will Deacon wrote:
> > > I've been trying to test your patches with a quad-core ARM 11MPCore on a
> > > Realview PB11MP board.
> > >
> > > Unfortunately, I occasionally experience a complete system hang during some
> > > profiling runs. I don't think it's your fault however, as it can occur even
> > > when monitoring only software events. I've managed to reproduce this on the
> > > tip/master branch and got the following information [I enabled lock debugging]:
> > Could it be to do with the fact that perf_event_task_sched_in() expects
> > interrupts to be disabled but on ARM we have __ARCH_WANT_INTERRUPTS_ON_CTXSW
> > defined and therefore run with interrupts enabled? If so, I'm not sure what
> > the fix is!
> >
> > At the moment, ARM is the only platform that context switches with interrupts
> > enabled and has perf event support.
>
> If perf event support is only safe with interrupts disabled, it should
> disable them. Maybe a patch to do that conditional on
> __ARCH_WANT_INTERRUPTS_ON_CTXSW would be more acceptable than an
> unconditional one - don't know.
>
> We could only define __ARCH_WANT_INTERRUPTS_ON_CTXSW for VIVT supporting
> kernels, which is the reason for it existing (the interrupt latency for
> VIVT would otherwise be unacceptable.) This approach would mean that
> perf events wouldn't be usable on VIVT CPUs (which includes Xscale CPUs.)
Ok, I've tried 2 things:
1. disabling interrupts around perf_event_task_sched_in()
2. undefining __ARCH_WANT_INTERRUPTS_ON_CTXSW
As far as I can tell, both of these solutions work, although with 2, I had to
define __ARCH_WANT_INTERRUPTS_ON_CTXSW.
Will, Jean - could you give the patch below a go and see if it works on your
systems? I don't get any lockdep warnings on my platform with this and it
still runs without the lock debugging.
It's not a nice patch but at least perf events could be used on all ARM
platforms. Also, I guess that this could be a local_irq_{disable,enable} pair
without the need of saving the flags when we know interrupts are enabled.
Thanks,
Jamie
diff --git a/kernel/sched.c b/kernel/sched.c
index 918f343..f110994 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2767,6 +2767,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
{
struct mm_struct *mm = rq->prev_mm;
long prev_state;
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ unsigned long flags;
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
rq->prev_mm = NULL;
@@ -2783,7 +2786,14 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_save(flags);
perf_event_task_sched_in(current);
+ local_irq_restore(flags);
+#else /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ perf_event_task_sched_in(current);
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+
finish_lock_switch(rq, prev);
fire_sched_in_preempt_notifiers(current);
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 1/5] arm: provide a mechanism to reserve performance counters
2010-01-04 10:48 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
2010-01-04 10:48 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
@ 2010-01-06 12:00 ` Michał Nazarewicz
2010-01-06 12:15 ` Jamie Iles
1 sibling, 1 reply; 30+ messages in thread
From: Michał Nazarewicz @ 2010-01-06 12:00 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, 04 Jan 2010 11:48:38 +0100, Jamie Iles <jamie.iles@picochip.com> wrote:
> To add support for perf events and to allow the hardware
> counters to be shared with oprofile, we need a way to reserve
> access to the pmu (performance monitor unit).
>
> Cc: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
> diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
> new file mode 100644
> index 0000000..5840d2d
> --- /dev/null
> +++ b/arch/arm/include/asm/pmu.h
> @@ -0,0 +1,74 @@
[...]
> +#ifndef __ARM_PMU_H__
> +#define __ARM_PMU_H__
> +
> +#ifdef CONFIG_CPU_HAS_PMU
[...]
> +#else /* CONFIG_CPU_HAS_PMU */
> +
> +static inline const struct pmu_irqs *
> +reserve_pmu(void)
> +{
> + ERR_PTR(-ENODEV);
- ERR_PTR(-ENODEV);
+ return ERR_PTR(-ENODEV);
> +}
> +
> +static inline int
> +release_pmu(const struct pmu_irqs *irqs)
> +{
+ return -ENODEV;
> +}
> +
> +static inline int
> +init_pmu(void)
> +{
> + return -ENODEV;
> +}
> +
> +#endif /* CONFIG_CPU_HAS_PMU */
> +
> +#endif /* __ARM_PMU_H__ */
> diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
> new file mode 100644
> index 0000000..a8c015d
> --- /dev/null
> +++ b/arch/arm/kernel/pmu.c
> @@ -0,0 +1,107 @@
[...]
> +static const int irqs[] = {
[...]
> +};
> +
> +static const struct pmu_irqs pmu_irqs = {
> + .irqs = irqs,
> + .num_irqs = ARRAY_SIZE(irqs),
> +};
> +
> +static DECLARE_MUTEX(pmu_mutex);
Isn't mutex an overkill? A bit field would be enough:
-static DECLARE_MUTEX(pmu_mutex);
+static volatile long pmu_mutex;
> +
> +const struct pmu_irqs *
> +reserve_pmu(void)
> +{
> + int ret = down_trylock(&pmu_mutex) ? -EBUSY : 0;
> +
> + return ret ? ERR_PTR(ret) : &pmu_irqs;
- int ret = down_trylock(&pmu_mutex) ? -EBUSY : 0;
-
- return ret ? ERR_PTR(ret) : &pmu_irqs;
+ return test_and_set_bit_lock(0, &pmu_mutex) ? ERR_PTR(-EBUSY) : &pmm_irqs;
> +}
> +EXPORT_SYMBOL_GPL(reserve_pmu);
> +
> +int
> +release_pmu(const struct pmu_irqs *irqs)
> +{
> + if (WARN_ON(irqs != &pmu_irqs))
> + return -EINVAL;
> + up(&pmu_mutex);
- up(&pmu_mutex);
+ clear_bit_unlock(&pmm_mutex);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(release_pmu);
[...]
--
Best regards, _ _
.o. | Liege of Serenely Enlightened Majesty of o' \,=./ `o
..o | Computer Science, Micha? "mina86" Nazarewicz (o o)
ooo +---<mina86@mina86.com>---<mina86@jabber.org>---ooO--(_)--Ooo--
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-06 0:18 ` Jamie Iles
@ 2010-01-06 12:09 ` Will Deacon
2010-01-06 12:14 ` Jamie Iles
0 siblings, 1 reply; 30+ messages in thread
From: Will Deacon @ 2010-01-06 12:09 UTC (permalink / raw)
To: linux-arm-kernel
Hi Jamie,
* Jamie Iles wrote:
> Ok, I've tried 2 things:
> 1. disabling interrupts around perf_event_task_sched_in()
> 2. undefining __ARCH_WANT_INTERRUPTS_ON_CTXSW
>
> As far as I can tell, both of these solutions work, although with 2, I had to
> define __ARCH_WANT_INTERRUPTS_ON_CTXSW.
I don't follow what you mean for point (2) when you say you have to define
__ARCH_WANT_INTERRUPTS_ON_CTXSW. I tried defining __ARCH_WANT_INTERRUPTS_ON_CTXSW
only when VIVT caches are present [as Russell mentioned], but I encountered
further locking problems with __new_context [see below].
> Will, Jean - could you give the patch below a go and see if it works on your
> systems? I don't get any lockdep warnings on my platform with this and it
> still runs without the lock debugging.
This patch solves the issue for me. Should this be integrated into your patchset
as that is the first perf code for ARM?
Cheers,
Will
======================================================
[ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ]
2.6.33-rc2-tip+ #1
------------------------------------------------------
swapper/0 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
(cpu_asid_lock){+.+...}, at: [<c0035c14>] __new_context+0x14/0xc4
and this task is already holding:
(&rq->lock){-.-.-.}, at: [<c030c948>] schedule+0xa8/0x834
which would create a new lock dependency:
(&rq->lock){-.-.-.} -> (cpu_asid_lock){+.+...}
but this new dependency connects a HARDIRQ-irq-safe lock:
(&rq->lock){-.-.-.}
... which became HARDIRQ-irq-safe at:
[<c0076074>] __lock_acquire+0x5c8/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c004665c>] scheduler_tick+0x34/0x144
[<c0058670>] update_process_times+0x40/0x4c
[<c00720e0>] tick_periodic+0xdc/0x108
[<c0072130>] tick_handle_periodic+0x24/0xf0
[<c0036e20>] realview_timer_interrupt+0x24/0x34
[<c008aa30>] handle_IRQ_event+0x5c/0x144
[<c008c848>] handle_level_irq+0xc0/0x134
[<c002a084>] asm_do_IRQ+0x84/0xc0
[<c002aca4>] __irq_svc+0x44/0xe0
[<c0309c64>] calibrate_delay+0x84/0x1ac
[<c0008be0>] start_kernel+0x224/0x2c8
[<70008080>] 0x70008080
to a HARDIRQ-irq-unsafe lock:
(cpu_asid_lock){+.+...}
... which became HARDIRQ-irq-unsafe at:
... [<c0076100>] __lock_acquire+0x654/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c00d7d74>] flush_old_exec+0x3b8/0x75c
[<c0109fa8>] load_elf_binary+0x340/0x1288
[<c00d7500>] search_binary_handler+0x130/0x320
[<c00d8bcc>] do_execve+0x1c0/0x2d4
[<c002e558>] kernel_execve+0x34/0x84
[<c002a7ac>] init_post+0xc0/0x110
[<c0008730>] kernel_init+0x1b8/0x208
[<c002c2ec>] kernel_thread_exit+0x0/0x8
other info that might help us debug this:
1 lock held by swapper/0:
#0: (&rq->lock){-.-.-.}, at: [<c030c948>] schedule+0xa8/0x834
the dependencies between HARDIRQ-irq-safe lock and the holding lock:
-> (&rq->lock){-.-.-.} ops: 0 {
IN-HARDIRQ-W at:
[<c0076074>] __lock_acquire+0x5c8/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c004665c>] scheduler_tick+0x34/0x144
[<c0058670>] update_process_times+0x40/0x4c
[<c00720e0>] tick_periodic+0xdc/0x108
[<c0072130>] tick_handle_periodic+0x24/0xf0
[<c0036e20>] realview_timer_interrupt+0x24/0x34
[<c008aa30>] handle_IRQ_event+0x5c/0x144
[<c008c848>] handle_level_irq+0xc0/0x134
[<c002a084>] asm_do_IRQ+0x84/0xc0
[<c002aca4>] __irq_svc+0x44/0xe0
[<c0309c64>] calibrate_delay+0x84/0x1ac
[<c0008be0>] start_kernel+0x224/0x2c8
[<70008080>] 0x70008080
IN-SOFTIRQ-W at:
[<c0076098>] __lock_acquire+0x5ec/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c004492c>] double_rq_lock+0x40/0x84
[<c0045c90>] run_rebalance_domains+0x208/0x510
[<c0051510>] __do_softirq+0xe8/0x1e4
[<c002a3cc>] do_local_timer+0x50/0x80
[<c002aca4>] __irq_svc+0x44/0xe0
[<c002c388>] default_idle+0x28/0x2c
[<c002c8ac>] cpu_idle+0x8c/0xe4
[<c0008c28>] start_kernel+0x26c/0x2c8
[<70008080>] 0x70008080
IN-RECLAIM_FS-W at:
[<c0076170>] __lock_acquire+0x6c4/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c003f35c>] task_rq_lock+0x40/0x78
[<c0045fc8>] set_cpus_allowed_ptr+0x30/0x1bc
[<c00b426c>] kswapd+0x78/0x620
[<c0066a5c>] kthread+0x7c/0x84
[<c002c2ec>] kernel_thread_exit+0x0/0x8
INITIAL USE at:
[<c0076188>] __lock_acquire+0x6dc/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f3e4>] _raw_spin_lock_irqsave+0x40/0x54
[<c004302c>] rq_attach_root+0x14/0x10c
[<c000c780>] sched_init+0x234/0x35c
[<c0008b28>] start_kernel+0x16c/0x2c8
[<70008080>] 0x70008080
}
... key at: [<c044ef3c>] __key.45524+0x0/0x8
... acquired at:
[<c0075a4c>] check_irq_usage+0x58/0xb8
[<c0076b54>] __lock_acquire+0x10a8/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c030cf50>] schedule+0x6b0/0x834
[<c002c8ec>] cpu_idle+0xcc/0xe4
[<70008080>] 0x70008080
the dependencies between the lock to be acquired and HARDIRQ-irq-unsafe lock:
-> (cpu_asid_lock){+.+...} ops: 0 {
HARDIRQ-ON-W at:
[<c0076100>] __lock_acquire+0x654/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c00d7d74>] flush_old_exec+0x3b8/0x75c
[<c0109fa8>] load_elf_binary+0x340/0x1288
[<c00d7500>] search_binary_handler+0x130/0x320
[<c00d8bcc>] do_execve+0x1c0/0x2d4
[<c002e558>] kernel_execve+0x34/0x84
[<c002a7ac>] init_post+0xc0/0x110
[<c0008730>] kernel_init+0x1b8/0x208
[<c002c2ec>] kernel_thread_exit+0x0/0x8
SOFTIRQ-ON-W at:
[<c0076124>] __lock_acquire+0x678/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c00d7d74>] flush_old_exec+0x3b8/0x75c
[<c0109fa8>] load_elf_binary+0x340/0x1288
[<c00d7500>] search_binary_handler+0x130/0x320
[<c00d8bcc>] do_execve+0x1c0/0x2d4
[<c002e558>] kernel_execve+0x34/0x84
[<c002a7ac>] init_post+0xc0/0x110
[<c0008730>] kernel_init+0x1b8/0x208
[<c002c2ec>] kernel_thread_exit+0x0/0x8
INITIAL USE at:
[<c0076188>] __lock_acquire+0x6dc/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c00d7d74>] flush_old_exec+0x3b8/0x75c
[<c0109fa8>] load_elf_binary+0x340/0x1288
[<c00d7500>] search_binary_handler+0x130/0x320
[<c00d8bcc>] do_execve+0x1c0/0x2d4
[<c002e558>] kernel_execve+0x34/0x84
[<c002a7ac>] init_post+0xc0/0x110
[<c0008730>] kernel_init+0x1b8/0x208
[<c002c2ec>] kernel_thread_exit+0x0/0x8
}
... key at: [<c042c57c>] cpu_asid_lock+0x10/0x1c
... acquired at:
[<c0075a4c>] check_irq_usage+0x58/0xb8
[<c0076b54>] __lock_acquire+0x10a8/0x17b4
[<c0077334>] lock_acquire+0xd4/0xec
[<c030f2e8>] _raw_spin_lock+0x2c/0x3c
[<c0035c14>] __new_context+0x14/0xc4
[<c030cf50>] schedule+0x6b0/0x834
[<c002c8ec>] cpu_idle+0xcc/0xe4
[<70008080>] 0x70008080
stack backtrace:
[<c0031760>] (unwind_backtrace+0x0/0xd4) from [<c0075984>] (check_usage+0x3f0/0x460)
[<c0075984>] (check_usage+0x3f0/0x460) from [<c0075a4c>] (check_irq_usage+0x58/0xb8)
[<c0075a4c>] (check_irq_usage+0x58/0xb8) from [<c0076b54>] (__lock_acquire+0x10a8/0x17b4)
[<c0076b54>] (__lock_acquire+0x10a8/0x17b4) from [<c0077334>] (lock_acquire+0xd4/0xec)
[<c0077334>] (lock_acquire+0xd4/0xec) from [<c030f2e8>] (_raw_spin_lock+0x2c/0x3c)
[<c030f2e8>] (_raw_spin_lock+0x2c/0x3c) from [<c0035c14>] (__new_context+0x14/0xc4)
[<c0035c14>] (__new_context+0x14/0xc4) from [<c030cf50>] (schedule+0x6b0/0x834)
[<c030cf50>] (schedule+0x6b0/0x834) from [<c002c8ec>] (cpu_idle+0xcc/0xe4)
[<c002c8ec>] (cpu_idle+0xcc/0xe4) from [<70008080>] (0x70008080)
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6
2010-01-06 12:09 ` Will Deacon
@ 2010-01-06 12:14 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-06 12:14 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Jan 06, 2010 at 12:09:07PM -0000, Will Deacon wrote:
> Hi Jamie,
>
> * Jamie Iles wrote:
>
> > Ok, I've tried 2 things:
> > 1. disabling interrupts around perf_event_task_sched_in()
> > 2. undefining __ARCH_WANT_INTERRUPTS_ON_CTXSW
> >
> > As far as I can tell, both of these solutions work, although with 2, I had to
> > define __ARCH_WANT_INTERRUPTS_ON_CTXSW.
>
> I don't follow what you mean for point (2) when you say you have to define
> __ARCH_WANT_INTERRUPTS_ON_CTXSW. I tried defining __ARCH_WANT_INTERRUPTS_ON_CTXSW
> only when VIVT caches are present [as Russell mentioned], but I encountered
> further locking problems with __new_context [see below].
I got my define's mixed up. If you undef __ARCH_WANT_INTERRUPTS_ON_CTXSW then
you also need to define __ARCH_WANT_UNLOCKED_CTXSW to avoid the locking
problems you've described.
>
> > Will, Jean - could you give the patch below a go and see if it works on your
> > systems? I don't get any lockdep warnings on my platform with this and it
> > still runs without the lock debugging.
>
> This patch solves the issue for me. Should this be integrated into your patchset
> as that is the first perf code for ARM?
As long as no-one else has any objection.
Cheers,
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 1/5] arm: provide a mechanism to reserve performance counters
2010-01-06 12:00 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Michał Nazarewicz
@ 2010-01-06 12:15 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2010-01-06 12:15 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Jan 06, 2010 at 01:00:56PM +0100, Micha? Nazarewicz wrote:
>> +#else /* CONFIG_CPU_HAS_PMU */
>> +
>> +static inline const struct pmu_irqs *
>> +reserve_pmu(void)
>> +{
>> + ERR_PTR(-ENODEV);
>
> - ERR_PTR(-ENODEV);
> + return ERR_PTR(-ENODEV);
>
>> +}
>> +
>> +static inline int
>> +release_pmu(const struct pmu_irqs *irqs)
>> +{
>
> + return -ENODEV;
>
>> +}
>> +
>> +static inline int
>> +init_pmu(void)
>> +{
>> + return -ENODEV;
>> +}
>> +
>> +#endif /* CONFIG_CPU_HAS_PMU */
>> +
>> +#endif /* __ARM_PMU_H__ */
Thanks, well spotted!
>> +static const struct pmu_irqs pmu_irqs = {
>> + .irqs = irqs,
>> + .num_irqs = ARRAY_SIZE(irqs),
>> +};
>> +
>> +static DECLARE_MUTEX(pmu_mutex);
>
> Isn't mutex an overkill? A bit field would be enough:
>
> -static DECLARE_MUTEX(pmu_mutex);
> +static volatile long pmu_mutex;
Yes, it probably is. I don't think performance is important here but that's a
simpler solution so I'll make that change.
Thanks,
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2010-01-14 12:14 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
@ 2010-01-14 12:14 ` Jamie Iles
2010-02-05 6:01 ` George G. Davis
0 siblings, 1 reply; 30+ messages in thread
From: Jamie Iles @ 2010-01-14 12:14 UTC (permalink / raw)
To: linux-arm-kernel
Make sure that we have access to the performance counters and
that they aren't being used by perf events or anything else.
Cc: Will Deacon <will.deacon@arm.com>
Cc: Jean Pihet <jpihet@mvista.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
---
arch/arm/oprofile/op_model_arm11_core.c | 4 +-
arch/arm/oprofile/op_model_arm11_core.h | 4 +-
arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
arch/arm/oprofile/op_model_v6.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
arch/arm/oprofile/op_model_v7.h | 4 +-
arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
7 files changed, 85 insertions(+), 64 deletions(-)
diff --git a/arch/arm/oprofile/op_model_arm11_core.c b/arch/arm/oprofile/op_model_arm11_core.c
index ad80752..ef3e265 100644
--- a/arch/arm/oprofile/op_model_arm11_core.c
+++ b/arch/arm/oprofile/op_model_arm11_core.c
@@ -132,7 +132,7 @@ static irqreturn_t arm11_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int arm11_request_interrupts(int *irqs, int nr)
+int arm11_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -153,7 +153,7 @@ int arm11_request_interrupts(int *irqs, int nr)
return ret;
}
-void arm11_release_interrupts(int *irqs, int nr)
+void arm11_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
diff --git a/arch/arm/oprofile/op_model_arm11_core.h b/arch/arm/oprofile/op_model_arm11_core.h
index 6f8538e..1902b99 100644
--- a/arch/arm/oprofile/op_model_arm11_core.h
+++ b/arch/arm/oprofile/op_model_arm11_core.h
@@ -39,7 +39,7 @@
int arm11_setup_pmu(void);
int arm11_start_pmu(void);
int arm11_stop_pmu(void);
-int arm11_request_interrupts(int *, int);
-void arm11_release_interrupts(int *, int);
+int arm11_request_interrupts(const int *, int);
+void arm11_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4ce0f98..f73ce87 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -32,6 +32,7 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
@@ -43,6 +44,7 @@
#include <mach/hardware.h>
#include <mach/board-eb.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -58,6 +60,7 @@
* Bitmask of used SCU counters
*/
static unsigned int scu_em_used;
+static const struct pmu_irqs *pmu_irqs;
/*
* 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
@@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
return 0;
}
-static int arm11_irqs[] = {
- [0] = IRQ_EB11MP_PMU_CPU0,
- [1] = IRQ_EB11MP_PMU_CPU1,
- [2] = IRQ_EB11MP_PMU_CPU2,
- [3] = IRQ_EB11MP_PMU_CPU3
-};
-
static int em_start(void)
{
int ret;
- ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
if (ret == 0) {
em_call_function(arm11_start_pmu);
ret = scu_start();
- if (ret)
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ if (ret) {
+ arm11_release_interrupts(pmu_irqs->irqs,
+ pmu_irqs->num_irqs);
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
}
+
+out:
return ret;
}
static void em_stop(void)
{
em_call_function(arm11_stop_pmu);
- arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
scu_stop();
+ release_pmu(pmu_irqs);
}
/*
@@ -283,15 +293,7 @@ static int em_setup(void)
em_route_irq(IRQ_EB11MP_PMU_SCU6, 3);
em_route_irq(IRQ_EB11MP_PMU_SCU7, 3);
- /*
- * Send CP15 PMU interrupts to the owner CPU.
- */
- em_route_irq(IRQ_EB11MP_PMU_CPU0, 0);
- em_route_irq(IRQ_EB11MP_PMU_CPU1, 1);
- em_route_irq(IRQ_EB11MP_PMU_CPU2, 2);
- em_route_irq(IRQ_EB11MP_PMU_CPU3, 3);
-
- return 0;
+ return init_pmu();
}
struct op_arm_model_spec op_mpcore_spec = {
diff --git a/arch/arm/oprofile/op_model_v6.c b/arch/arm/oprofile/op_model_v6.c
index f7d2ec5..a22357a 100644
--- a/arch/arm/oprofile/op_model_v6.c
+++ b/arch/arm/oprofile/op_model_v6.c
@@ -19,39 +19,47 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <asm/irq.h>
#include <asm/system.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_arm11_core.h"
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP2
- 3,
-#endif
-#ifdef CONFIG_ARCH_BCMRING
- IRQ_PMUIRQ, /* for BCMRING, ARM PMU interrupt is 43 */
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv6_pmu_stop(void)
{
arm11_stop_pmu();
- arm11_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ arm11_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv6_pmu_start(void)
{
int ret;
- ret = arm11_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs)) {
+ ret = PTR_ERR(pmu_irqs);
+ goto out;
+ }
+
+ ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
ret = arm11_start_pmu();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
+out:
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c
index 2088a6c..8642d08 100644
--- a/arch/arm/oprofile/op_model_v7.c
+++ b/arch/arm/oprofile/op_model_v7.c
@@ -11,11 +11,14 @@
*/
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp.h>
+#include <asm/pmu.h>
+
#include "op_counter.h"
#include "op_arm_model.h"
#include "op_model_v7.h"
@@ -295,7 +298,7 @@ static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
-int armv7_request_interrupts(int *irqs, int nr)
+int armv7_request_interrupts(const int *irqs, int nr)
{
unsigned int i;
int ret = 0;
@@ -318,7 +321,7 @@ int armv7_request_interrupts(int *irqs, int nr)
return ret;
}
-void armv7_release_interrupts(int *irqs, int nr)
+void armv7_release_interrupts(const int *irqs, int nr)
{
unsigned int i;
@@ -362,12 +365,7 @@ static void armv7_pmnc_dump_regs(void)
}
#endif
-
-static int irqs[] = {
-#ifdef CONFIG_ARCH_OMAP3
- INT_34XX_BENCH_MPU_EMUL,
-#endif
-};
+static const struct pmu_irqs *pmu_irqs;
static void armv7_pmnc_stop(void)
{
@@ -375,19 +373,29 @@ static void armv7_pmnc_stop(void)
armv7_pmnc_dump_regs();
#endif
armv7_stop_pmnc();
- armv7_release_interrupts(irqs, ARRAY_SIZE(irqs));
+ armv7_release_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int armv7_pmnc_start(void)
{
int ret;
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
#ifdef DEBUG
armv7_pmnc_dump_regs();
#endif
- ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs));
- if (ret >= 0)
+ ret = armv7_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
+ if (ret >= 0) {
armv7_start_pmnc();
+ } else {
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
+ }
return ret;
}
diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h
index 0e19bcc..9ca334b 100644
--- a/arch/arm/oprofile/op_model_v7.h
+++ b/arch/arm/oprofile/op_model_v7.h
@@ -97,7 +97,7 @@
int armv7_setup_pmu(void);
int armv7_start_pmu(void);
int armv7_stop_pmu(void);
-int armv7_request_interrupts(int *, int);
-void armv7_release_interrupts(int *, int);
+int armv7_request_interrupts(const int *, int);
+void armv7_release_interrupts(const int *, int);
#endif
diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
index 724ab9c..1d34a02 100644
--- a/arch/arm/oprofile/op_model_xscale.c
+++ b/arch/arm/oprofile/op_model_xscale.c
@@ -17,12 +17,14 @@
/* #define DEBUG */
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sched.h>
#include <linux/oprofile.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <asm/cputype.h>
+#include <asm/pmu.h>
#include "op_counter.h"
#include "op_arm_model.h"
@@ -33,17 +35,6 @@
#define PMU_RESET (CCNT_RESET | PMN_RESET)
#define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
-/* TODO do runtime detection */
-#ifdef CONFIG_ARCH_IOP32X
-#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_IOP33X
-#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU
-#endif
-#ifdef CONFIG_ARCH_PXA
-#define XSCALE_PMU_IRQ IRQ_PMU
-#endif
-
/*
* Different types of events that can be counted by the XScale PMU
* as used by Oprofile userspace. Here primarily for documentation
@@ -367,6 +358,8 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
return IRQ_HANDLED;
}
+static const struct pmu_irqs *pmu_irqs;
+
static void xscale_pmu_stop(void)
{
u32 pmnc = read_pmnc();
@@ -374,20 +367,30 @@ static void xscale_pmu_stop(void)
pmnc &= ~PMU_ENABLE;
write_pmnc(pmnc);
- free_irq(XSCALE_PMU_IRQ, results);
+ free_irq(pmu_irqs->irqs[0], results);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
}
static int xscale_pmu_start(void)
{
int ret;
- u32 pmnc = read_pmnc();
+ u32 pmnc;
+
+ pmu_irqs = reserve_pmu();
+ if (IS_ERR(pmu_irqs))
+ return PTR_ERR(pmu_irqs);
+
+ pmnc = read_pmnc();
- ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
- "XScale PMU", (void *)results);
+ ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt,
+ IRQF_DISABLED, "XScale PMU", (void *)results);
if (ret < 0) {
printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
- XSCALE_PMU_IRQ);
+ pmu_irqs->irqs[0]);
+ release_pmu(pmu_irqs);
+ pmu_irqs = NULL;
return ret;
}
--
1.6.5.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2010-01-14 12:14 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
@ 2010-02-05 6:01 ` George G. Davis
2010-02-05 9:13 ` Jamie Iles
0 siblings, 1 reply; 30+ messages in thread
From: George G. Davis @ 2010-02-05 6:01 UTC (permalink / raw)
To: linux-arm-kernel
Hi,
On Thu, Jan 14, 2010 at 12:14:13PM +0000, Jamie Iles wrote:
> Make sure that we have access to the performance counters and
> that they aren't being used by perf events or anything else.
>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Jean Pihet <jpihet@mvista.com>
> Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
> ---
> arch/arm/oprofile/op_model_arm11_core.c | 4 +-
> arch/arm/oprofile/op_model_arm11_core.h | 4 +-
> arch/arm/oprofile/op_model_mpcore.c | 42 ++++++++++++++++--------------
> arch/arm/oprofile/op_model_v6.c | 30 ++++++++++++++--------
> arch/arm/oprofile/op_model_v7.c | 30 ++++++++++++++--------
> arch/arm/oprofile/op_model_v7.h | 4 +-
> arch/arm/oprofile/op_model_xscale.c | 35 ++++++++++++++-----------
> 7 files changed, 85 insertions(+), 64 deletions(-)
// CUT
> diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
> index 4ce0f98..f73ce87 100644
> --- a/arch/arm/oprofile/op_model_mpcore.c
> +++ b/arch/arm/oprofile/op_model_mpcore.c
> @@ -32,6 +32,7 @@
> /* #define DEBUG */
> #include <linux/types.h>
> #include <linux/errno.h>
> +#include <linux/err.h>
> #include <linux/sched.h>
> #include <linux/oprofile.h>
> #include <linux/interrupt.h>
> @@ -43,6 +44,7 @@
> #include <mach/hardware.h>
> #include <mach/board-eb.h>
> #include <asm/system.h>
> +#include <asm/pmu.h>
>
> #include "op_counter.h"
> #include "op_arm_model.h"
> @@ -58,6 +60,7 @@
> * Bitmask of used SCU counters
> */
> static unsigned int scu_em_used;
> +static const struct pmu_irqs *pmu_irqs;
>
> /*
> * 2 helper fns take a counter number from 0-7 (not the userspace-visible counter number)
> @@ -225,33 +228,40 @@ static int em_setup_ctrs(void)
> return 0;
> }
>
> -static int arm11_irqs[] = {
> - [0] = IRQ_EB11MP_PMU_CPU0,
> - [1] = IRQ_EB11MP_PMU_CPU1,
> - [2] = IRQ_EB11MP_PMU_CPU2,
> - [3] = IRQ_EB11MP_PMU_CPU3
> -};
> -
> static int em_start(void)
> {
> int ret;
>
> - ret = arm11_request_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
> + pmu_irqs = reserve_pmu();
> + if (IS_ERR(pmu_irqs)) {
> + ret = PTR_ERR(pmu_irqs);
> + goto out;
> + }
> +
> + ret = arm11_request_interrupts(pmu_irqs->irqs, pmu_irqs->num_irqs);
> if (ret == 0) {
> em_call_function(arm11_start_pmu);
>
> ret = scu_start();
> - if (ret)
> - arm11_release_interrupts(arm11_irqs, ARRAY_SIZE(arm11_irqs));
> + if (ret) {
> + arm11_release_interrupts(pmu_irqs->irqs,
> + pmu_irqs->num_irqs);
> + } else {
> + release_pmu(pmu_irqs);
> + pmu_irqs = NULL;
> + }
> }
> +
> +out:
> return ret;
> }
The "} else {" clause above broke OProfile on ARM11 MPCore. Here's a
trivial fix tested on ARM Ltd. RealView EB ARM11 MPCore:
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] arm/oprofile: reserve the PMU when starting
2010-02-05 6:01 ` George G. Davis
@ 2010-02-05 9:13 ` Jamie Iles
0 siblings, 0 replies; 30+ messages in thread
From: Jamie Iles @ 2010-02-05 9:13 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Feb 05, 2010 at 01:01:54AM -0500, George G. Davis wrote:
> From 18018f4e439ebcf9358790887502764b18459c9c Mon Sep 17 00:00:00 2001
> From: George G. Davis <gdavis@mvista.com>
> Date: Fri, 5 Feb 2010 00:38:01 -0500
> Subject: [PATCH] ARM: Fix ARM11 MPCore OProfile breakage due to ARM patch 5901/2
>
> The recent "fe6c67f ARM: 5901/2: arm/oprofile: reserve the PMU when
> starting" commit broke OProfile support on ARM11 MPCore targets by
> adding a misplaced "} else {" clause in function em_start() which
> results in a call to release_pmu() even though the PMU is in use
> which in turn results in an oops while using OProfile. Removing
> the stray else clause fixes the problem.
>
> Signed-off-by: George G. Davis <gdavis@mvista.com>
> ---
> arch/arm/oprofile/op_model_mpcore.c | 1 -
> 1 files changed, 0 insertions(+), 1 deletions(-)
>
> diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
> index f73ce87..0d4f099 100644
> --- a/arch/arm/oprofile/op_model_mpcore.c
> +++ b/arch/arm/oprofile/op_model_mpcore.c
> @@ -246,7 +246,6 @@ static int em_start(void)
> if (ret) {
> arm11_release_interrupts(pmu_irqs->irqs,
> pmu_irqs->num_irqs);
> - } else {
> release_pmu(pmu_irqs);
> pmu_irqs = NULL;
> }
>
>
> --
Hi George,
Good catch. Looks good to me.
Jamie
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2010-02-05 9:13 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-04 10:48 ARM perf events support v4 Jamie Iles
2010-01-04 10:48 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
2010-01-04 10:48 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2010-01-04 10:48 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Iles
2010-01-04 10:48 ` [PATCH 4/5] arm: enable support for software perf events Jamie Iles
2010-01-04 10:48 ` [PATCH 5/5] arm/perfevents: implement perf event support for ARMv6 Jamie Iles
2010-01-04 11:17 ` Russell King - ARM Linux
2010-01-04 11:46 ` Jamie Iles
2010-01-05 18:07 ` Will Deacon
2010-01-05 18:23 ` Jean Pihet
2010-01-05 22:26 ` Jamie Iles
2010-01-05 22:31 ` Russell King - ARM Linux
2010-01-06 0:18 ` Jamie Iles
2010-01-06 12:09 ` Will Deacon
2010-01-06 12:14 ` Jamie Iles
2010-01-04 11:11 ` [PATCH 4/5] arm: enable support for software perf events Russell King - ARM Linux
2010-01-04 12:26 ` Jamie Iles
2010-01-04 12:32 ` Russell King - ARM Linux
2010-01-05 18:57 ` [PATCH 3/5] arm: use the spinlocked, generic atomic64 support Jamie Lokier
2010-01-05 19:08 ` Jamie Iles
2010-01-06 12:00 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Michał Nazarewicz
2010-01-06 12:15 ` Jamie Iles
-- strict thread matches above, loose matches on Subject: below --
2010-01-14 12:14 ARM perf events support v5 Jamie Iles
2010-01-14 12:14 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
2010-01-14 12:14 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2010-02-05 6:01 ` George G. Davis
2010-02-05 9:13 ` Jamie Iles
2009-12-15 11:15 ARMv6 performance counters v3 Jamie Iles
2009-12-15 11:15 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
2009-12-15 11:15 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2009-12-14 14:04 ARMv6 performance counters v2 Jamie Iles
2009-12-14 14:04 ` [PATCH 1/5] arm: provide a mechanism to reserve performance counters Jamie Iles
2009-12-14 14:04 ` [PATCH 2/5] arm/oprofile: reserve the PMU when starting Jamie Iles
2009-12-14 16:01 ` Jean Pihet
2009-12-14 16:04 ` Will Deacon
2009-12-14 16:10 ` Jamie Iles
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).