* performance counter support for ARM architecture
@ 2009-09-30 7:26 nelakurthi koteswararao
2009-09-30 7:56 ` Frederic Weisbecker
0 siblings, 1 reply; 2+ messages in thread
From: nelakurthi koteswararao @ 2009-09-30 7:26 UTC (permalink / raw)
To: linux-arm-kernel
Dear all,
I will change the naming conventions and symbolic names once perfcounter for
ARM is supported.
I want to do intermediate releases for review in mean time
1. I am able to support page faults in ARM with the attached patch along
with application.( this is for linux-2.6.29 kernel)
-bash-3.2# ./perf stat ./array
Performance counter stats for './array':
2005.297192 task-clock-msecs # 0.998 CPUs
7 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
76 page-faults # 0.000 M/sec
<not counted> cycles
<not counted> instructions
<not counted> cache-references
<not counted> cache-misses
2.009101297 seconds time elapsed
Please look at it and give your review comments.
Regards,
Koteswararao.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20090930/7854d777/attachment.htm>
-------------- next part --------------
Supported performance counter for ARM architecture.
ChangeLog:
2009/09/24
Location: Linux-2.6.29.y-BRANCH_SS
refs #6657
First changelog verison.
---
arch/arm/Kconfig | 3 3 + 0 - 0 !
arch/arm/include/asm/atomic.h | 1 1 + 0 - 0 !
arch/arm/include/asm/perf_counter.h | 8 8 + 0 - 0 !
arch/arm/include/asm/unistd.h | 3 2 + 1 - 0 !
arch/arm/kernel/calls.S | 1 1 + 0 - 0 !
arch/arm/mm/fault.c | 10 9 + 1 - 0 !
include/asm-generic/atomic64.h | 42 42 + 0 - 0 !
lib/Kconfig | 6 6 + 0 - 0 !
lib/Makefile | 2 2 + 0 - 0 !
lib/atomic64.c | 175 175 + 0 - 0 !
tools/perf/perf.h | 6 6 + 0 - 0 !
11 files changed, 255 insertions(+), 2 deletions(-)
Index: b/arch/arm/include/asm/unistd.h
===================================================================
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -387,8 +387,9 @@
#define __NR_dup3 (__NR_SYSCALL_BASE+358)
#define __NR_pipe2 (__NR_SYSCALL_BASE+359)
#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360)
+#define __NR_perf_counter_open (__NR_SYSCALL_BASE+361)
-#define __NR_syscall_max 361
+#define __NR_syscall_max 362
/*
* The following SWIs are ARM private.
Index: b/arch/arm/kernel/calls.S
===================================================================
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -370,6 +370,7 @@
CALL(sys_dup3)
CALL(sys_pipe2)
/* 360 */ CALL(sys_inotify_init1)
+ CALL(sys_perf_counter_open)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
Index: b/tools/perf/perf.h
===================================================================
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -25,6 +25,12 @@
#define cpu_relax() asm volatile("" ::: "memory");
#endif
+#ifdef __arm__
+#include "../../arch/arm/include/asm/unistd.h"
+#define rmb() asm volatile("" ::: "memory")
+#define cpu_relax() asm volatile("" ::: "memory");
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
Index: b/arch/arm/Kconfig
===================================================================
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -20,6 +20,7 @@ config ARM
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!XIP_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
+ select GENERIC_ATOMIC64
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
@@ -253,6 +254,7 @@ config ARCH_NE1
# select PCI
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
+ select HAVE_PERF_COUNTERS
help
This enables support for NEC-EL NaviEngine1-based boards.
@@ -463,6 +465,7 @@ config ARCH_MXC
select ARCH_MTD_XIP
select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
+ select HAVE_PERF_COUNTERS
help
Support for Freescale MXC/iMX-based family of processors
Index: b/arch/arm/include/asm/perf_counter.h
===================================================================
--- /dev/null
+++ b/arch/arm/include/asm/perf_counter.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_ARM_PERF_COUNTER_H
+#define _ASM_ARM_PERF_COUNTER_H
+
+#define PERF_COUNTER_INDEX_OFFSET 1
+/* ARM only supports software counters through this interface. */
+static inline void set_perf_counter_pending(void) { do { } while(0);
+}
+#endif /* _ASM_ARM_PERF_COUNTER_H */
Index: b/arch/arm/include/asm/atomic.h
===================================================================
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -225,6 +225,7 @@ static inline int atomic_add_unless(atom
#define smp_mb__before_atomic_inc() barrier()
#define smp_mb__after_atomic_inc() barrier()
+#include <asm-generic/atomic64.h>
#include <asm-generic/atomic.h>
#endif
#endif
Index: b/include/asm-generic/atomic64.h
===================================================================
--- /dev/null
+++ b/include/asm-generic/atomic64.h
@@ -0,0 +1,42 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright ? 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_GENERIC_ATOMIC64_H
+#define _ASM_GENERIC_ATOMIC64_H
+
+typedef struct {
+ long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(i) { (i) }
+
+extern long long atomic64_read(const atomic64_t *v);
+extern void atomic64_set(atomic64_t *v, long long i);
+extern void atomic64_add(long long a, atomic64_t *v);
+extern long long atomic64_add_return(long long a, atomic64_t *v);
+extern void atomic64_sub(long long a, atomic64_t *v);
+extern long long atomic64_sub_return(long long a, atomic64_t *v);
+extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
+extern long long atomic64_xchg(atomic64_t *v, long long new);
+extern int atomic64_add_unless(atomic64_t *v, long long a, long long u);
+
+#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v) atomic64_add(1LL, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v) atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
+
+#endif /* _ASM_GENERIC_ATOMIC64_H */
Index: b/lib/atomic64.c
===================================================================
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,175 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright ? 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable. Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS 16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+ spinlock_t lock;
+ char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+ unsigned long addr = (unsigned long) v;
+
+ addr >>= L1_CACHE_SHIFT;
+ addr ^= (addr >> 8) ^ (addr >> 16);
+ return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter = i;
+ spin_unlock_irqrestore(lock, flags);
+}
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter += a;
+ spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter += a;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter -= a;
+ spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter -= a;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter - 1;
+ if (val >= 0)
+ v->counter = val;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ if (val == o)
+ v->counter = n;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ v->counter = new;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ int ret = 1;
+
+ spin_lock_irqsave(lock, flags);
+ if (v->counter != u) {
+ v->counter += a;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(lock, flags);
+ return ret;
+}
+
+static int init_atomic64_lock(void)
+{
+ int i;
+
+ for (i = 0; i < NR_LOCKS; ++i)
+ spin_lock_init(&atomic64_lock[i].lock);
+ return 0;
+}
+
+pure_initcall(init_atomic64_lock);
Index: b/lib/Makefile
===================================================================
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -88,6 +88,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += sys
obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o
+obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+
hostprogs-y := gen_crc32table
clean-files := crc32table.h
Index: b/lib/Kconfig
===================================================================
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -177,4 +177,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTION
bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
depends on EXPERIMENTAL && BROKEN
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+ bool
+
endmenu
Index: b/arch/arm/mm/fault.c
===================================================================
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -16,6 +16,7 @@
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/page-flags.h>
+#include <linux/perf_counter.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -145,7 +146,6 @@ __do_user_fault(struct task_struct *tsk,
show_regs(regs);
}
#endif
-
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
@@ -254,6 +254,7 @@ do_page_fault(unsigned long addr, unsign
tsk = current;
mm = tsk->mm;
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr);
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
@@ -281,6 +282,13 @@ do_page_fault(unsigned long addr, unsign
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0;
+ if(tsk->maj_flt)
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, addr);
+ if(tsk->min_flt)
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, addr);
+
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
-------------- next part --------------
int fast_multiply(x, y)
{
return x * y;
}
int slow_multiply(x, y)
{
int i, j, z;
for (i = 0, z = 0; i < x; i++)
z = z + y;
return z;
}
int main()
{
int i,j;
int x,y;
for (i = 0; i < 200; i ++) {
for (j = 0; j < 3000 ; j++) {
x = fast_multiply(i, j);
y = slow_multiply(i, j);
}
}
return 0;
}
^ permalink raw reply [flat|nested] 2+ messages in thread* performance counter support for ARM architecture
2009-09-30 7:26 performance counter support for ARM architecture nelakurthi koteswararao
@ 2009-09-30 7:56 ` Frederic Weisbecker
0 siblings, 0 replies; 2+ messages in thread
From: Frederic Weisbecker @ 2009-09-30 7:56 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Sep 30, 2009 at 12:56:18PM +0530, nelakurthi koteswararao wrote:
> Dear all,
>
> I will change the naming conventions and symbolic names once perfcounter for
> ARM is supported.
> I want to do intermediate releases for review in mean time
>
> 1. I am able to support page faults in ARM with the attached patch along
> with application.( this is for linux-2.6.29 kernel)
Perf counters wasn't even in the 2.6.29 kernel so
I guess you are basing this work on a completely out of date
perf version.
We can't take patches based on 29. We can't even review
them, that doesn't make sense given the tons of things that have
changed since 2.6.29
We need patches against 2.6.32-rc1
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-09-30 7:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-30 7:26 performance counter support for ARM architecture nelakurthi koteswararao
2009-09-30 7:56 ` Frederic Weisbecker
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).