* [RFC][PATCH 00/11] tracepoint perf counter events and other stuff
@ 2009-03-17 21:56 Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 01/11] perf_counter: fix uninitialized usage of event_list Peter Zijlstra
` (10 more replies)
0 siblings, 11 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
RFC because the patch stack seems to not quite work as expected, will
poke at it more in the morning, but thought I should post it so that
other folks can see what's happening.
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 01/11] perf_counter: fix uninitialized usage of event_list
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 02/11] perf_counter: generic context switch event Peter Zijlstra
` (9 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_swcounter_fix-init.patch --]
[-- Type: text/plain, Size: 1346 bytes --]
When doing the generic context switch event I ran into some early boot
hangs, which were caused by inf func recursion (event, fault, event, fault).
I eventually tracked it down to event_list not being initialized at the
time of the first event. Fix this.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_counter.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1506,7 +1506,7 @@ static void perf_swcounter_ctx_event(str
{
struct perf_counter *counter;
- if (list_empty(&ctx->event_list))
+ if (!ctx->is_active || list_empty(&ctx->event_list))
return;
rcu_read_lock();
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 219748d..ca226a9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -124,6 +124,8 @@ extern struct cred init_cred;
# define INIT_PERF_COUNTERS(tsk) \
.perf_counter_ctx.counter_list = \
LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \
+ .perf_counter_ctx.event_list = \
+ LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \
.perf_counter_ctx.lock = \
__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
#else
--
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [RFC][PATCH 02/11] perf_counter: generic context switch event
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 01/11] perf_counter: fix uninitialized usage of event_list Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 03/11] ftrace: fix memory leak Peter Zijlstra
` (8 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_swcounter_ctx.patch --]
[-- Type: text/plain, Size: 3885 bytes --]
use the generic software events for context switches.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 1
kernel/perf_counter.c | 60 +++-----------------------------------------------
kernel/sched.c | 6 -----
3 files changed, 4 insertions(+), 63 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -137,7 +137,6 @@ extern unsigned long nr_running(void);
extern unsigned long nr_uninterruptible(void);
extern unsigned long nr_active(void);
extern unsigned long nr_iowait(void);
-extern u64 cpu_nr_switches(int cpu);
extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr);
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -710,10 +710,13 @@ void perf_counter_task_sched_out(struct
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &task->perf_counter_ctx;
+ struct pt_regs *regs;
if (likely(!cpuctx->task_ctx))
return;
+ regs = task_pt_regs(task);
+ perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
__perf_counter_sched_out(ctx, cpuctx);
cpuctx->task_ctx = NULL;
@@ -1671,58 +1674,6 @@ static const struct hw_perf_counter_ops
};
/*
- * Software counter: context switches
- */
-
-static u64 get_context_switches(struct perf_counter *counter)
-{
- struct task_struct *curr = counter->ctx->task;
-
- if (curr)
- return curr->nvcsw + curr->nivcsw;
- return cpu_nr_switches(smp_processor_id());
-}
-
-static void context_switches_perf_counter_update(struct perf_counter *counter)
-{
- u64 prev, now;
- s64 delta;
-
- prev = atomic64_read(&counter->hw.prev_count);
- now = get_context_switches(counter);
-
- atomic64_set(&counter->hw.prev_count, now);
-
- delta = now - prev;
-
- atomic64_add(delta, &counter->count);
-}
-
-static void context_switches_perf_counter_read(struct perf_counter *counter)
-{
- context_switches_perf_counter_update(counter);
-}
-
-static int context_switches_perf_counter_enable(struct perf_counter *counter)
-{
- if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
- atomic64_set(&counter->hw.prev_count,
- get_context_switches(counter));
- return 0;
-}
-
-static void context_switches_perf_counter_disable(struct perf_counter *counter)
-{
- context_switches_perf_counter_update(counter);
-}
-
-static const struct hw_perf_counter_ops perf_ops_context_switches = {
- .enable = context_switches_perf_counter_enable,
- .disable = context_switches_perf_counter_disable,
- .read = context_switches_perf_counter_read,
-};
-
-/*
* Software counter: cpu migrations
*/
@@ -1811,11 +1762,8 @@ sw_perf_counter_init(struct perf_counter
case PERF_COUNT_PAGE_FAULTS:
case PERF_COUNT_PAGE_FAULTS_MIN:
case PERF_COUNT_PAGE_FAULTS_MAJ:
- hw_ops = &perf_ops_generic;
- break;
case PERF_COUNT_CONTEXT_SWITCHES:
- if (!counter->hw_event.exclude_kernel)
- hw_ops = &perf_ops_context_switches;
+ hw_ops = &perf_ops_generic;
break;
case PERF_COUNT_CPU_MIGRATIONS:
if (!counter->hw_event.exclude_kernel)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -2911,14 +2911,8 @@ unsigned long nr_active(void)
/*
* Externally visible per-cpu scheduler statistics:
- * cpu_nr_switches(cpu) - number of context switches on that cpu
* cpu_nr_migrations(cpu) - number of migrations into that cpu
*/
-u64 cpu_nr_switches(int cpu)
-{
- return cpu_rq(cpu)->nr_switches;
-}
-
u64 cpu_nr_migrations(int cpu)
{
return cpu_rq(cpu)->nr_migrations_in;
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 03/11] ftrace: fix memory leak
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 01/11] perf_counter: fix uninitialized usage of event_list Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 02/11] perf_counter: generic context switch event Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 22:49 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 04/11] ftrace: provide an id file for each event Peter Zijlstra
` (7 subsequent siblings)
10 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: ftrace_event-fix.patch --]
[-- Type: text/plain, Size: 809 bytes --]
Don't return after an allocation without freeing -- fix it by moving the check
up a few lines.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/trace/trace_events.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: linux-2.6/kernel/trace/trace_events.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_events.c
+++ linux-2.6/kernel/trace/trace_events.c
@@ -380,15 +380,15 @@ event_format_read(struct file *filp, cha
char *buf;
int r;
+ if (*ppos)
+ return 0;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
- if (*ppos)
- return 0;
-
/* If any of the first writes fail, so will the show_format. */
trace_seq_printf(s, "name: %s\n", call->name);
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 04/11] ftrace: provide an id file for each event
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (2 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 03/11] ftrace: fix memory leak Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 22:52 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 05/11] ftrace: ensure every event gets an id Peter Zijlstra
` (6 subsequent siblings)
10 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: ftrace_event_id.patch --]
[-- Type: text/plain, Size: 1826 bytes --]
Since not every event has a format file to read the id from, expose it
explicitly in a separate file.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/trace/trace_events.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
Index: linux-2.6/kernel/trace/trace_events.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_events.c
+++ linux-2.6/kernel/trace/trace_events.c
@@ -414,6 +414,29 @@ event_format_read(struct file *filp, cha
return r;
}
+static ssize_t
+event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct ftrace_event_call *call = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+ trace_seq_printf(s, "%d\n", call->id);
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos,
+ s->buffer, s->len);
+ kfree(s);
+ return r;
+}
+
static const struct seq_operations show_event_seq_ops = {
.start = t_start,
.next = t_next,
@@ -454,6 +477,11 @@ static const struct file_operations ftra
.read = event_format_read,
};
+static const struct file_operations ftrace_event_id_fops = {
+ .open = tracing_open_generic,
+ .read = event_id_read,
+};
+
static struct dentry *event_trace_events_dir(void)
{
static struct dentry *d_tracer;
@@ -552,6 +580,14 @@ event_create_dir(struct ftrace_event_cal
"'%s/enable' entry\n", call->name);
}
+ if (call->id) {
+ entry = debugfs_create_file("id", 0444, call->dir, call,
+ &ftrace_event_id_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs '%s/id' entry\n",
+ call->name);
+ }
+
/* A trace may not want to export its format */
if (!call->show_format)
return 0;
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 05/11] ftrace: ensure every event gets an id
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (3 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 04/11] ftrace: provide an id file for each event Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 22:54 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 06/11] ftrace: event profile hooks Peter Zijlstra
` (5 subsequent siblings)
10 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: ftrace_event_fmt_id.patch --]
[-- Type: text/plain, Size: 1976 bytes --]
Previously only TRACE_EVENT events got ids, because only they generated
raw output which needs to be demuxed from the trace.
In order to provide a unique ID for each event, register everybody, regardless.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/trace/trace_events_stage_3.h | 15 ++++++++++++++-
kernel/trace/trace_output.c | 5 +++++
2 files changed, 19 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/trace/trace_events_stage_3.h
===================================================================
--- linux-2.6.orig/kernel/trace/trace_events_stage_3.h
+++ linux-2.6/kernel/trace/trace_events_stage_3.h
@@ -130,7 +130,19 @@ static void ftrace_unreg_event_##call(vo
{ \
unregister_trace_##call(ftrace_event_##call); \
} \
-
+ \
+static struct ftrace_event_call event_##call; \
+ \
+static int ftrace_init_event_##call(void) \
+{ \
+ int id; \
+ \
+ id = register_ftrace_event(NULL); \
+ if (!id) \
+ return -ENODEV; \
+ event_##call.id = id; \
+ return 0; \
+}
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt) \
@@ -140,6 +152,7 @@ __attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
+ .raw_init = ftrace_init_event_##call, \
.regfunc = ftrace_reg_event_##call, \
.unregfunc = ftrace_unreg_event_##call, \
}
Index: linux-2.6/kernel/trace/trace_output.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_output.c
+++ linux-2.6/kernel/trace/trace_output.c
@@ -444,6 +444,11 @@ int register_ftrace_event(struct trace_e
mutex_lock(&trace_event_mutex);
+ if (!event) {
+ ret = next_event_type++;
+ goto out;
+ }
+
if (!event->type)
event->type = next_event_type++;
else if (event->type > __TRACE_LAST_TYPE) {
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 06/11] ftrace: event profile hooks
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (4 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 05/11] ftrace: ensure every event gets an id Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 07/11] perf_counter: fix up counter free paths Peter Zijlstra
` (4 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: ftrace_event_profile.patch --]
[-- Type: text/plain, Size: 6695 bytes --]
Provide infrastructure to generate software perf counter events from
tracepoints.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/trace/Makefile | 1
kernel/trace/events.c | 1
kernel/trace/trace.h | 11 +++++++++
kernel/trace/trace_event_profile.c | 31 +++++++++++++++++++++++++
kernel/trace/trace_events.c | 9 +------
kernel/trace/trace_events_stage_3.h | 44 ++++++++++++++++++++++++++++++++++++
6 files changed, 89 insertions(+), 8 deletions(-)
Index: linux-2.6/kernel/trace/trace.h
===================================================================
--- linux-2.6.orig/kernel/trace/trace.h
+++ linux-2.6/kernel/trace/trace.h
@@ -786,12 +786,23 @@ struct ftrace_event_call {
int id;
int (*raw_init)(void);
int (*show_format)(struct trace_seq *s);
+
+#ifdef CONFIG_EVENT_PROFILE
+ atomic_t profile_count;
+ int (*profile_enable)(struct ftrace_event_call *);
+ void (*profile_disable)(struct ftrace_event_call *);
+#endif
};
void event_trace_printk(unsigned long ip, const char *fmt, ...);
extern struct ftrace_event_call __start_ftrace_events[];
extern struct ftrace_event_call __stop_ftrace_events[];
+#define for_each_event(event) \
+ for (event = __start_ftrace_events; \
+ (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+ event++)
+
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
Index: linux-2.6/kernel/trace/trace_event_profile.c
===================================================================
--- /dev/null
+++ linux-2.6/kernel/trace/trace_event_profile.c
@@ -0,0 +1,31 @@
+/*
+ * trace event based perf counter profiling
+ *
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ */
+
+#include "trace.h"
+
+int ftrace_profile_enable(int event_id)
+{
+ struct ftrace_event_call *event;
+
+ for_each_event(event) {
+ if (event->id == event_id)
+ return event->profile_enable(event);
+ }
+
+ return -EINVAL;
+}
+
+void ftrace_profile_disable(int event_id)
+{
+ struct ftrace_event_call *event;
+
+ for_each_event(event) {
+ if (event->id == event_id)
+ return event->profile_disable(event);
+ }
+}
+
Index: linux-2.6/kernel/trace/trace_events.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_events.c
+++ linux-2.6/kernel/trace/trace_events.c
@@ -19,11 +19,6 @@
static DEFINE_MUTEX(event_mutex);
-#define events_for_each(event) \
- for (event = __start_ftrace_events; \
- (unsigned long)event < (unsigned long)__stop_ftrace_events; \
- event++)
-
static void ftrace_clear_events(void)
{
struct ftrace_event_call *call = (void *)__start_ftrace_events;
@@ -90,7 +85,7 @@ static int ftrace_set_clr_event(char *bu
}
mutex_lock(&event_mutex);
- events_for_each(call) {
+ for_each_event(call) {
if (!call->name || !call->regfunc)
continue;
@@ -628,7 +623,7 @@ static __init int event_trace_init(void)
if (!d_events)
return 0;
- events_for_each(call) {
+ for_each_event(call) {
/* The linker may leave blanks */
if (!call->name)
continue;
Index: linux-2.6/kernel/trace/trace_events_stage_3.h
===================================================================
--- linux-2.6.orig/kernel/trace/trace_events_stage_3.h
+++ linux-2.6/kernel/trace/trace_events_stage_3.h
@@ -109,6 +109,40 @@
#undef TP_FMT
#define TP_FMT(fmt, args...) fmt "\n", ##args
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args) \
+static void ftrace_profile_##call(proto) \
+{ \
+ extern void perf_tpcounter_event(int); \
+ perf_tpcounter_event(event_##call.id); \
+} \
+ \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+{ \
+ int ret = 0; \
+ \
+ if (!atomic_inc_return(&call->profile_count)) \
+ ret = register_trace_##call(ftrace_profile_##call); \
+ \
+ return ret; \
+} \
+ \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+{ \
+ if (atomic_add_negative(-1, &call->profile_count)) \
+ unregister_trace_##call(ftrace_profile_##call); \
+}
+
+#define _TRACE_PROFILE_INIT(call) \
+ .profile_count = ATOMIC_INIT(-1), \
+ .profile_enable = ftrace_profile_enable_##call, \
+ .profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
#define _TRACE_FORMAT(call, proto, args, fmt) \
static void ftrace_event_##call(proto) \
{ \
@@ -147,6 +181,7 @@ static int ftrace_init_event_##call(void
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt) \
_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
@@ -155,6 +190,7 @@ __attribute__((section("_ftrace_events")
.raw_init = ftrace_init_event_##call, \
.regfunc = ftrace_reg_event_##call, \
.unregfunc = ftrace_unreg_event_##call, \
+ _TRACE_PROFILE_INIT(call) \
}
#undef __entry
@@ -162,6 +198,7 @@ __attribute__((section("_ftrace_events")
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
\
static struct ftrace_event_call event_##call; \
\
@@ -227,4 +264,11 @@ __attribute__((section("_ftrace_events")
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
.show_format = ftrace_format_##call, \
+ _TRACE_PROFILE_INIT(call) \
}
+
+#include <trace/trace_event_types.h>
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
Index: linux-2.6/kernel/trace/Makefile
===================================================================
--- linux-2.6.orig/kernel/trace/Makefile
+++ linux-2.6/kernel/trace/Makefile
@@ -44,5 +44,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_even
obj-$(CONFIG_EVENT_TRACER) += events.o
obj-$(CONFIG_EVENT_TRACER) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
+obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
libftrace-y := ftrace.o
Index: linux-2.6/kernel/trace/events.c
===================================================================
--- linux-2.6.orig/kernel/trace/events.c
+++ linux-2.6/kernel/trace/events.c
@@ -12,4 +12,3 @@
#include "trace_events_stage_2.h"
#include "trace_events_stage_3.h"
-#include <trace/trace_event_types.h>
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 07/11] perf_counter: fix up counter free paths
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (5 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 06/11] ftrace: event profile hooks Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 08/11] perf_counter: hook up the tracepoint events Peter Zijlstra
` (3 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_counter-free_counter.patch --]
[-- Type: text/plain, Size: 1230 bytes --]
I found another counter free path, create a free_counter() call to accomodate
generic tear-down.
Fixes an RCU bug.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_counter.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1150,6 +1150,11 @@ static void free_counter_rcu(struct rcu_
kfree(counter);
}
+static void free_counter(struct perf_counter *counter)
+{
+ call_rcu(&counter->rcu_head, free_counter_rcu);
+}
+
/*
* Called when the last reference to the file is gone.
*/
@@ -1168,7 +1173,7 @@ static int perf_release(struct inode *in
mutex_unlock(&counter->mutex);
mutex_unlock(&ctx->mutex);
- call_rcu(&counter->rcu_head, free_counter_rcu);
+ free_counter(counter);
put_context(ctx);
return 0;
@@ -2128,10 +2133,10 @@ __perf_counter_exit_task(struct task_str
list_entry) {
if (sub->parent) {
sync_child_counter(sub, sub->parent);
- kfree(sub);
+ free_counter(sub);
}
}
- kfree(child_counter);
+ free_counter(child_counter);
}
}
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 08/11] perf_counter: hook up the tracepoint events
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (6 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 07/11] perf_counter: fix up counter free paths Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI Peter Zijlstra
` (2 subsequent siblings)
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_swcounter_event.patch --]
[-- Type: text/plain, Size: 3057 bytes --]
Enable usage of tracepoints as perf counter events.
tracepoint event ids can be found in /debug/tracing/event/*/*/id
and (for now) are represented as -65536+id in the type field.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_counter.h | 3 +++
init/Kconfig | 5 +++++
kernel/perf_counter.c | 43 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 51 insertions(+)
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -53,6 +53,8 @@ enum hw_event_types {
PERF_COUNT_PAGE_FAULTS_MAJ = -7,
PERF_SW_EVENTS_MIN = -8,
+
+ PERF_TP_EVENTS_MIN = -65536
};
/*
@@ -222,6 +224,7 @@ struct perf_counter {
struct perf_data *usrdata;
struct perf_data data[2];
+ void (*destroy)(struct perf_counter *);
struct rcu_head rcu_head;
#endif
};
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1152,6 +1152,9 @@ static void free_counter_rcu(struct rcu_
static void free_counter(struct perf_counter *counter)
{
+ if (counter->destroy)
+ counter->destroy(counter);
+
call_rcu(&counter->rcu_head, free_counter_rcu);
}
@@ -1727,6 +1730,45 @@ static const struct hw_perf_counter_ops
.read = cpu_migrations_perf_counter_read,
};
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tpcounter_event(int event_id)
+{
+ perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
+ task_pt_regs(current));
+}
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_counter_destroy(struct perf_counter *counter)
+{
+ int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+
+ ftrace_profile_disable(event_id);
+}
+
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+ int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+ int ret;
+
+ ret = ftrace_profile_enable(event_id);
+ if (ret)
+ return NULL;
+
+ counter->destroy = tp_perf_counter_destroy;
+
+ return &perf_ops_generic;
+}
+#else
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+ return NULL;
+}
+#endif
+
static const struct hw_perf_counter_ops *
sw_perf_counter_init(struct perf_counter *counter)
{
@@ -1772,6 +1814,7 @@ sw_perf_counter_init(struct perf_counter
hw_ops = &perf_ops_cpu_migrations;
break;
default:
+ hw_ops = tp_perf_counter_init(counter);
break;
}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -945,6 +945,11 @@ config PERF_COUNTERS
Say Y if unsure.
+config EVENT_PROFILE
+ bool "Tracepoint profile sources"
+ depends on PERF_COUNTERS && EVENT_TRACER
+ default y
+
endmenu
config VM_EVENT_COUNTERS
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (7 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 08/11] perf_counter: hook up the tracepoint events Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-18 2:29 ` Paul Mackerras
2009-03-18 4:31 ` Paul Mackerras
2009-03-17 21:56 ` [RFC][PATCH 10/11] perfcounters: abstract wakeup flag setting in core to fix powerpc build Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 11/11] perf_counter: unify irq output code Peter Zijlstra
10 siblings, 2 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_counter-input-abi.patch --]
[-- Type: text/plain, Size: 12129 bytes --]
The hardware/software classification in hw_event->type became a little strained
due to the addition of tracepoint tracing.
Instead split up the field and provide a type field to explicitly specify the
counter type, while using the event_id field to specify which event to use.
Raw counters still work as before, only the raw config now goes into raw_event.
Updated userspace at:
http://programming.kicks-ass.net/misc/perf_counter/
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/kernel/perf_counter.c | 2
arch/x86/kernel/cpu/perf_counter.c | 8 +--
include/linux/perf_counter.h | 83 ++++++++++++++++++++++---------------
kernel/perf_counter.c | 81 +++++++++++++++++++++---------------
4 files changed, 104 insertions(+), 70 deletions(-)
Index: linux-2.6/arch/powerpc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/perf_counter.c
+++ linux-2.6/arch/powerpc/kernel/perf_counter.c
@@ -693,7 +693,7 @@ static void perf_handle_group(struct per
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
if (sub != counter)
sub->hw_ops->read(sub);
- perf_store_irq_data(counter, sub->hw_event.type);
+ perf_store_irq_data(counter, sub->hw_event.raw_event);
perf_store_irq_data(counter, atomic64_read(&sub->count));
}
}
Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c
@@ -216,14 +216,14 @@ static int __hw_perf_counter_init(struct
* Raw event type provide the config in the event structure
*/
if (hw_event->raw) {
- hwc->config |= pmc_ops->raw_event(hw_event->type);
+ hwc->config |= pmc_ops->raw_event(hw_event->raw_event);
} else {
- if (hw_event->type >= pmc_ops->max_events)
+ if (hw_event->event_id >= pmc_ops->max_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= pmc_ops->event_map(hw_event->type);
+ hwc->config |= pmc_ops->event_map(hw_event->event_id);
}
counter->wakeup_pending = 0;
@@ -713,7 +713,7 @@ perf_handle_group(struct perf_counter *s
list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
- perf_store_irq_data(sibling, counter->hw_event.type);
+ perf_store_irq_data(sibling, counter->hw_event.raw_event);
perf_store_irq_data(sibling, atomic64_read(&counter->count));
}
}
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -21,56 +21,72 @@
*/
/*
- * Generalized performance counter event types, used by the hw_event.type
+ * hw_event.type
+ */
+enum perf_event_types {
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+};
+
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
* parameter of the sys_perf_counter_open() syscall:
*/
-enum hw_event_types {
+enum hw_event_ids {
/*
* Common hardware events, generalized by the kernel:
*/
- PERF_COUNT_CPU_CYCLES = 0,
- PERF_COUNT_INSTRUCTIONS = 1,
- PERF_COUNT_CACHE_REFERENCES = 2,
- PERF_COUNT_CACHE_MISSES = 3,
- PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
- PERF_COUNT_BRANCH_MISSES = 5,
- PERF_COUNT_BUS_CYCLES = 6,
-
- PERF_HW_EVENTS_MAX = 7,
+ PERF_COUNT_CPU_CYCLES = 0,
+ PERF_COUNT_INSTRUCTIONS = 1,
+ PERF_COUNT_CACHE_REFERENCES = 2,
+ PERF_COUNT_CACHE_MISSES = 3,
+ PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_BRANCH_MISSES = 5,
+ PERF_COUNT_BUS_CYCLES = 6,
- /*
- * Special "software" counters provided by the kernel, even if
- * the hardware does not support performance counters. These
- * counters measure various physical and sw events of the
- * kernel (and allow the profiling of them as well):
- */
- PERF_COUNT_CPU_CLOCK = -1,
- PERF_COUNT_TASK_CLOCK = -2,
- PERF_COUNT_PAGE_FAULTS = -3,
- PERF_COUNT_CONTEXT_SWITCHES = -4,
- PERF_COUNT_CPU_MIGRATIONS = -5,
- PERF_COUNT_PAGE_FAULTS_MIN = -6,
- PERF_COUNT_PAGE_FAULTS_MAJ = -7,
+ PERF_HW_EVENTS_MAX = 7,
+};
- PERF_SW_EVENTS_MIN = -8,
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum sw_event_ids {
+ PERF_COUNT_CPU_CLOCK = 0,
+ PERF_COUNT_TASK_CLOCK = 1,
+ PERF_COUNT_PAGE_FAULTS = 2,
+ PERF_COUNT_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_CPU_MIGRATIONS = 4,
+ PERF_COUNT_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_PAGE_FAULTS_MAJ = 6,
- PERF_TP_EVENTS_MIN = -65536
+ PERF_SW_EVENTS_MAX = 7,
};
/*
* IRQ-notification data record type:
*/
enum perf_counter_record_type {
- PERF_RECORD_SIMPLE = 0,
- PERF_RECORD_IRQ = 1,
- PERF_RECORD_GROUP = 2,
+ PERF_RECORD_SIMPLE = 0,
+ PERF_RECORD_IRQ = 1,
+ PERF_RECORD_GROUP = 2,
};
/*
* Hardware event to monitor via a performance monitoring counter:
*/
struct perf_counter_hw_event {
- __s64 type;
+ union {
+ __u64 raw_event;
+ struct {
+ __u64 event_id : 32,
+ type : 8,
+ __reserved_0 : 24;
+ };
+ };
__u64 irq_period;
__u64 record_type;
@@ -298,10 +314,11 @@ extern int hw_perf_group_sched_in(struct
*/
static inline int is_software_counter(struct perf_counter *counter)
{
- return !counter->hw_event.raw && counter->hw_event.type < 0;
+ return !counter->hw_event.raw &&
+ counter->hw_event.type != PERF_TYPE_HARDWARE;
}
-extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
#else
static inline void
@@ -320,7 +337,7 @@ static inline u64 hw_perf_save_disable(v
static inline int perf_counter_task_disable(void) { return -EINVAL; }
static inline int perf_counter_task_enable(void) { return -EINVAL; }
-static inline void perf_swcounter_event(enum hw_event_types event, u64 nr,
+static inline void perf_swcounter_event(u32 event, u64 nr,
int nmi, struct pt_regs *regs) { }
#endif
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(st
atomic64_set(&hwc->count, -left);
}
-static void perf_swcounter_save_and_restart(struct perf_counter *counter)
-{
- perf_swcounter_update(counter);
- perf_swcounter_set_period(counter);
-}
-
static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
{
struct perf_data *irqdata = counter->irqdata;
@@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(
list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
counter->hw_ops->read(counter);
- perf_swcounter_store_irq(sibling, counter->hw_event.type);
+ perf_swcounter_store_irq(sibling, counter->hw_event.raw_event);
perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
}
}
@@ -1477,13 +1471,14 @@ static enum hrtimer_restart perf_swcount
static void perf_swcounter_overflow(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
{
- perf_swcounter_save_and_restart(counter);
+ perf_swcounter_update(counter);
+ perf_swcounter_set_period(counter);
perf_swcounter_interrupt(counter, nmi, regs);
}
static int perf_swcounter_match(struct perf_counter *counter,
- enum hw_event_types event,
- struct pt_regs *regs)
+ enum perf_event_types type,
+ u32 event, struct pt_regs *regs)
{
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
return 0;
@@ -1491,7 +1486,10 @@ static int perf_swcounter_match(struct p
if (counter->hw_event.raw)
return 0;
- if (counter->hw_event.type != event)
+ if (counter->hw_event.type != type)
+ return 0;
+
+ if (counter->hw_event.event_id != event)
return 0;
if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct pe
}
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
- enum hw_event_types event, u64 nr,
- int nmi, struct pt_regs *regs)
+ enum perf_event_types type, u32 event,
+ u64 nr, int nmi, struct pt_regs *regs)
{
struct perf_counter *counter;
@@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(str
rcu_read_lock();
list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
- if (perf_swcounter_match(counter, event, regs))
+ if (perf_swcounter_match(counter, type, event, regs))
perf_swcounter_add(counter, nr, nmi, regs);
}
rcu_read_unlock();
}
-void perf_swcounter_event(enum hw_event_types event, u64 nr,
- int nmi, struct pt_regs *regs)
+static void __perf_swcounter_event(enum perf_event_types type, u32 event,
+ u64 nr, int nmi, struct pt_regs *regs)
{
struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
- perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs);
- if (cpuctx->task_ctx)
- perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs);
+ perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+ if (cpuctx->task_ctx) {
+ perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
+ nr, nmi, regs);
+ }
put_cpu_var(perf_cpu_context);
}
+void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+{
+ __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+}
+
static void perf_swcounter_read(struct perf_counter *counter)
{
perf_swcounter_update(counter);
@@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops
#ifdef CONFIG_EVENT_PROFILE
void perf_tpcounter_event(int event_id)
{
- perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
- task_pt_regs(current));
+ struct pt_regs *regs = get_irq_regs();
+
+ if (!regs)
+ regs = task_pt_regs(current);
+
+ __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
}
extern int ftrace_profile_enable(int);
@@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int);
static void tp_perf_counter_destroy(struct perf_counter *counter)
{
- int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
-
- ftrace_profile_disable(event_id);
+ ftrace_profile_disable(counter->hw_event.event_id);
}
static const struct hw_perf_counter_ops *
tp_perf_counter_init(struct perf_counter *counter)
{
- int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+ int event_id = counter->hw_event.event_id;
int ret;
ret = ftrace_profile_enable(event_id);
@@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter
return NULL;
counter->destroy = tp_perf_counter_destroy;
+ counter->hw.irq_period = counter->hw_event.irq_period;
return &perf_ops_generic;
}
@@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter
* to be kernel events, and page faults are never hypervisor
* events.
*/
- switch (counter->hw_event.type) {
+ switch (counter->hw_event.event_id) {
case PERF_COUNT_CPU_CLOCK:
hw_ops = &perf_ops_cpu_clock;
@@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter
if (!counter->hw_event.exclude_kernel)
hw_ops = &perf_ops_cpu_migrations;
break;
- default:
- hw_ops = tp_perf_counter_init(counter);
- break;
}
if (hw_ops)
@@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_h
counter->state = PERF_COUNTER_STATE_OFF;
hw_ops = NULL;
- if (!hw_event->raw && hw_event->type < 0)
- hw_ops = sw_perf_counter_init(counter);
- else
+
+ if (hw_event->raw)
+ hw_ops = hw_perf_counter_init(counter);
+ else switch (hw_event->type) {
+ case PERF_TYPE_HARDWARE:
hw_ops = hw_perf_counter_init(counter);
+ break;
+
+ case PERF_TYPE_SOFTWARE:
+ hw_ops = sw_perf_counter_init(counter);
+ break;
+
+ case PERF_TYPE_TRACEPOINT:
+ hw_ops = tp_perf_counter_init(counter);
+ break;
+ }
if (!hw_ops) {
kfree(counter);
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 10/11] perfcounters: abstract wakeup flag setting in core to fix powerpc build
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (8 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 11/11] perf_counter: unify irq output code Peter Zijlstra
10 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: paulus_perfcounters-abstract_wakeup_flag_setting_in_core_to_fix_powerpc_build.patch --]
[-- Type: text/plain, Size: 5301 bytes --]
From: Paul Mackerras <paulus@samba.org>
Impact: build fix for powerpc
Commit bd753921015e7905 ("perf_counter: software counter event
infrastructure") introduced a use of TIF_PERF_COUNTERS into the core
perfcounter code. This breaks the build on powerpc because we use
a flag in a per-cpu area to signal wakeups on powerpc rather than
a thread_info flag, because the thread_info flags have to be
manipulated with atomic operations and are thus slower than per-cpu
flags.
This fixes the by changing the core to use an abstracted
set_perf_counter_pending() function, which is defined on x86 to set
the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag
(paca->perf_counter_pending). It changes the previous powerpc
definition of set_perf_counter_pending to not take an argument and
adds a clear_perf_counter_pending, so as to simplify the definition
on x86.
On x86, set_perf_counter_pending() is defined as a macro. Defining
it as a static inline in arch/x86/include/asm/perf_counters.h causes
compile failures because <asm/perf_counters.h> gets included early in
<linux/sched.h>, and the definitions of set_tsk_thread_flag etc. are
therefore not available in <asm/perf_counters.h>. (On powerpc this
problem is avoided by defining set_perf_counter_pending etc. in
<asm/hw_irq.h>.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/include/asm/hw_irq.h | 14 +++++++++++---
arch/powerpc/kernel/irq.c | 11 +++--------
arch/powerpc/kernel/perf_counter.c | 3 +--
arch/x86/include/asm/perf_counter.h | 3 +++
kernel/perf_counter.c | 2 +-
5 files changed, 19 insertions(+), 14 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/hw_irq.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/hw_irq.h
+++ linux-2.6/arch/powerpc/include/asm/hw_irq.h
@@ -142,10 +142,17 @@ static inline unsigned long get_perf_cou
return x;
}
-static inline void set_perf_counter_pending(int x)
+static inline void set_perf_counter_pending(void)
{
asm volatile("stb %0,%1(13)" : :
- "r" (x),
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+static inline void clear_perf_counter_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
"i" (offsetof(struct paca_struct, perf_counter_pending)));
}
@@ -158,7 +165,8 @@ static inline unsigned long get_perf_cou
return 0;
}
-static inline void set_perf_counter_pending(int x) {}
+static inline void set_perf_counter_pending(void) {}
+static inline void clear_perf_counter_pending(void) {}
static inline void perf_counter_do_pending(void) {}
#endif /* CONFIG_PERF_COUNTERS */
Index: linux-2.6/arch/powerpc/kernel/irq.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/irq.c
+++ linux-2.6/arch/powerpc/kernel/irq.c
@@ -104,13 +104,6 @@ static inline notrace void set_soft_enab
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
-#ifdef CONFIG_PERF_COUNTERS
-notrace void __weak perf_counter_do_pending(void)
-{
- set_perf_counter_pending(0);
-}
-#endif
-
notrace void raw_local_irq_restore(unsigned long en)
{
/*
@@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsig
iseries_handle_interrupts();
}
- if (get_perf_counter_pending())
+ if (get_perf_counter_pending()) {
+ clear_perf_counter_pending();
perf_counter_do_pending();
+ }
/*
* if (get_paca()->hard_enabled) return;
Index: linux-2.6/arch/powerpc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/perf_counter.c
+++ linux-2.6/arch/powerpc/kernel/perf_counter.c
@@ -653,7 +653,6 @@ void perf_counter_do_pending(void)
struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
struct perf_counter *counter;
- set_perf_counter_pending(0);
for (i = 0; i < cpuhw->n_counters; ++i) {
counter = cpuhw->counter[i];
if (counter && counter->wakeup_pending) {
@@ -811,7 +810,7 @@ static void perf_counter_interrupt(struc
perf_counter_do_pending();
irq_exit();
} else {
- set_perf_counter_pending(1);
+ set_perf_counter_pending();
}
}
}
Index: linux-2.6/arch/x86/include/asm/perf_counter.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/perf_counter.h
+++ linux-2.6/arch/x86/include/asm/perf_counter.h
@@ -84,6 +84,9 @@ union cpuid10_edx {
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
+#define set_perf_counter_pending() \
+ set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(int nmi);
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1438,7 +1438,7 @@ static void perf_swcounter_interrupt(str
if (nmi) {
counter->wakeup_pending = 1;
- set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+ set_perf_counter_pending();
} else
wake_up(&counter->waitq);
}
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH 11/11] perf_counter: unify irq output code
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
` (9 preceding siblings ...)
2009-03-17 21:56 ` [RFC][PATCH 10/11] perfcounters: abstract wakeup flag setting in core to fix powerpc build Peter Zijlstra
@ 2009-03-17 21:56 ` Peter Zijlstra
2009-03-18 2:37 ` Paul Mackerras
10 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-17 21:56 UTC (permalink / raw)
To: mingo, paulus, rostedt; +Cc: linux-kernel, Peter Zijlstra
[-- Attachment #1: perf_counter-output.patch --]
[-- Type: text/plain, Size: 8662 bytes --]
Having 3 slightly different copies of the same code around does nobody any
good. First step in revamping the output format.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/kernel/perf_counter.c | 51 -----------------
arch/x86/kernel/cpu/perf_counter.c | 53 ------------------
include/linux/perf_counter.h | 2
kernel/perf_counter.c | 106 +++++++++++++++++++------------------
4 files changed, 61 insertions(+), 151 deletions(-)
Index: linux-2.6/arch/powerpc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/perf_counter.c
+++ linux-2.6/arch/powerpc/kernel/perf_counter.c
@@ -663,41 +663,6 @@ void perf_counter_do_pending(void)
}
/*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
- struct perf_data *irqdata = counter->irqdata;
-
- if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
- irqdata->overrun++;
- } else {
- u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
- *p = data;
- irqdata->len += sizeof(u64);
- }
-}
-
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
- struct perf_counter *leader, *sub;
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->hw_ops->read(sub);
- perf_store_irq_data(counter, sub->hw_event.raw_event);
- perf_store_irq_data(counter, atomic64_read(&sub->count));
- }
-}
-
-/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
* here so there is no possibility of being interrupted.
@@ -736,20 +701,8 @@ static void record_and_restart(struct pe
/*
* Finally record data if requested.
*/
- if (record) {
- switch (counter->hw_event.record_type) {
- case PERF_RECORD_SIMPLE:
- break;
- case PERF_RECORD_IRQ:
- perf_store_irq_data(counter, instruction_pointer(regs));
- counter->wakeup_pending = 1;
- break;
- case PERF_RECORD_GROUP:
- perf_handle_group(counter);
- counter->wakeup_pending = 1;
- break;
- }
- }
+ if (record)
+ perf_counter_output(counter, 1, regs);
}
/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c
@@ -672,20 +672,6 @@ static void pmc_generic_disable(struct p
x86_perf_counter_update(counter, hwc, idx);
}
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
- struct perf_data *irqdata = counter->irqdata;
-
- if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
- irqdata->overrun++;
- } else {
- u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
- *p = data;
- irqdata->len += sizeof(u64);
- }
-}
-
/*
* Save and restart an expired counter. Called by NMI contexts,
* so it has to be careful about preempting normal counter ops:
@@ -702,22 +688,6 @@ static void perf_save_and_restart(struct
__pmc_generic_enable(counter, hwc, idx);
}
-static void
-perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
-{
- struct perf_counter *counter, *group_leader = sibling->group_leader;
-
- /*
- * Store sibling timestamps (if any):
- */
- list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-
- x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
- perf_store_irq_data(sibling, counter->hw_event.raw_event);
- perf_store_irq_data(sibling, atomic64_read(&counter->count));
- }
-}
-
/*
* Maximum interrupt frequency of 100KHz per CPU
*/
@@ -752,28 +722,7 @@ again:
continue;
perf_save_and_restart(counter);
-
- switch (counter->hw_event.record_type) {
- case PERF_RECORD_SIMPLE:
- continue;
- case PERF_RECORD_IRQ:
- perf_store_irq_data(counter, instruction_pointer(regs));
- break;
- case PERF_RECORD_GROUP:
- perf_handle_group(counter, &status, &ack);
- break;
- }
- /*
- * From NMI context we cannot call into the scheduler to
- * do a task wakeup - but we mark these generic as
- * wakeup_pending and initate a wakeup callback:
- */
- if (nmi) {
- counter->wakeup_pending = 1;
- set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
- } else {
- wake_up(&counter->waitq);
- }
+ perf_counter_output(counter, nmi, regs);
}
hw_perf_ack_status(ack);
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -309,6 +309,8 @@ extern int hw_perf_group_sched_in(struct
struct perf_cpu_context *cpuctx,
struct perf_counter_context *ctx, int cpu);
+extern void perf_counter_output(struct perf_counter *counter,
+ int nmi, struct pt_regs *regs);
/*
* Return 1 for a software counter, 0 for a hardware counter
*/
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1354,6 +1354,60 @@ static const struct file_operations perf
};
/*
+ * Output
+ */
+
+static void perf_counter_store_irq(struct perf_counter *counter, u64 data)
+{
+ struct perf_data *irqdata = counter->irqdata;
+
+ if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+ irqdata->overrun++;
+ } else {
+ u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+ *p = data;
+ irqdata->len += sizeof(u64);
+ }
+}
+
+static void perf_counter_handle_group(struct perf_counter *counter)
+{
+ struct perf_counter *leader, *sub;
+
+ leader = counter->group_leader;
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ if (sub != counter)
+ sub->hw_ops->read(sub);
+ perf_counter_store_irq(counter, sub->hw_event.raw_event);
+ perf_counter_store_irq(counter, atomic64_read(&sub->count));
+ }
+}
+
+void perf_counter_output(struct perf_counter *counter,
+ int nmi, struct pt_regs *regs)
+{
+ switch (counter->hw_event.record_type) {
+ case PERF_RECORD_SIMPLE:
+ return;
+
+ case PERF_RECORD_IRQ:
+ perf_counter_store_irq(counter, instruction_pointer(regs));
+ break;
+
+ case PERF_RECORD_GROUP:
+ perf_counter_handle_group(counter);
+ break;
+ }
+
+ if (nmi) {
+ counter->wakeup_pending = 1;
+ set_perf_counter_pending();
+ } else
+ wake_up(&counter->waitq);
+}
+
+/*
* Generic software counter infrastructure
*/
@@ -1395,54 +1449,6 @@ static void perf_swcounter_set_period(st
atomic64_set(&hwc->count, -left);
}
-static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
-{
- struct perf_data *irqdata = counter->irqdata;
-
- if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
- irqdata->overrun++;
- } else {
- u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
- *p = data;
- irqdata->len += sizeof(u64);
- }
-}
-
-static void perf_swcounter_handle_group(struct perf_counter *sibling)
-{
- struct perf_counter *counter, *group_leader = sibling->group_leader;
-
- list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
- counter->hw_ops->read(counter);
- perf_swcounter_store_irq(sibling, counter->hw_event.raw_event);
- perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
- }
-}
-
-static void perf_swcounter_interrupt(struct perf_counter *counter,
- int nmi, struct pt_regs *regs)
-{
- switch (counter->hw_event.record_type) {
- case PERF_RECORD_SIMPLE:
- break;
-
- case PERF_RECORD_IRQ:
- perf_swcounter_store_irq(counter, instruction_pointer(regs));
- break;
-
- case PERF_RECORD_GROUP:
- perf_swcounter_handle_group(counter);
- break;
- }
-
- if (nmi) {
- counter->wakeup_pending = 1;
- set_perf_counter_pending();
- } else
- wake_up(&counter->waitq);
-}
-
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
{
struct perf_counter *counter;
@@ -1461,7 +1467,7 @@ static enum hrtimer_restart perf_swcount
regs = task_pt_regs(current);
if (regs)
- perf_swcounter_interrupt(counter, 0, regs);
+ perf_counter_output(counter, 0, regs);
hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
@@ -1473,7 +1479,7 @@ static void perf_swcounter_overflow(stru
{
perf_swcounter_update(counter);
perf_swcounter_set_period(counter);
- perf_swcounter_interrupt(counter, nmi, regs);
+ perf_counter_output(counter, nmi, regs);
}
static int perf_swcounter_match(struct perf_counter *counter,
--
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 03/11] ftrace: fix memory leak
2009-03-17 21:56 ` [RFC][PATCH 03/11] ftrace: fix memory leak Peter Zijlstra
@ 2009-03-17 22:49 ` Steven Rostedt
0 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2009-03-17 22:49 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, paulus, LKML, Tom Zanussi
Tom Zanussi beat you to it ;-)
http://marc.info/?l=linux-kernel&m=123727898221734&w=2
-- Steve
On Tue, 17 Mar 2009, Peter Zijlstra wrote:
> Don't return after an allocation without freeing -- fix it by moving the check
> up a few lines.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> kernel/trace/trace_events.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> Index: linux-2.6/kernel/trace/trace_events.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_events.c
> +++ linux-2.6/kernel/trace/trace_events.c
> @@ -380,15 +380,15 @@ event_format_read(struct file *filp, cha
> char *buf;
> int r;
>
> + if (*ppos)
> + return 0;
> +
> s = kmalloc(sizeof(*s), GFP_KERNEL);
> if (!s)
> return -ENOMEM;
>
> trace_seq_init(s);
>
> - if (*ppos)
> - return 0;
> -
> /* If any of the first writes fail, so will the show_format. */
>
> trace_seq_printf(s, "name: %s\n", call->name);
>
> --
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 04/11] ftrace: provide an id file for each event
2009-03-17 21:56 ` [RFC][PATCH 04/11] ftrace: provide an id file for each event Peter Zijlstra
@ 2009-03-17 22:52 ` Steven Rostedt
0 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2009-03-17 22:52 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, paulus, linux-kernel
On Tue, 17 Mar 2009, Peter Zijlstra wrote:
> Since not every event has a format file to read the id from, expose it
> explicitly in a separate file.
If the format is not exported, the ID is pretty useless. It's used for
binary formats.
But if you have a generic format defined in trace.h (I'll have to look
at your other patches), then those are exported in
kernel/trace/trace_event_types.h
-- Steve
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> kernel/trace/trace_events.c | 36 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> Index: linux-2.6/kernel/trace/trace_events.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_events.c
> +++ linux-2.6/kernel/trace/trace_events.c
> @@ -414,6 +414,29 @@ event_format_read(struct file *filp, cha
> return r;
> }
>
> +static ssize_t
> +event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
> +{
> + struct ftrace_event_call *call = filp->private_data;
> + struct trace_seq *s;
> + int r;
> +
> + if (*ppos)
> + return 0;
> +
> + s = kmalloc(sizeof(*s), GFP_KERNEL);
> + if (!s)
> + return -ENOMEM;
> +
> + trace_seq_init(s);
> + trace_seq_printf(s, "%d\n", call->id);
> +
> + r = simple_read_from_buffer(ubuf, cnt, ppos,
> + s->buffer, s->len);
> + kfree(s);
> + return r;
> +}
> +
> static const struct seq_operations show_event_seq_ops = {
> .start = t_start,
> .next = t_next,
> @@ -454,6 +477,11 @@ static const struct file_operations ftra
> .read = event_format_read,
> };
>
> +static const struct file_operations ftrace_event_id_fops = {
> + .open = tracing_open_generic,
> + .read = event_id_read,
> +};
> +
> static struct dentry *event_trace_events_dir(void)
> {
> static struct dentry *d_tracer;
> @@ -552,6 +580,14 @@ event_create_dir(struct ftrace_event_cal
> "'%s/enable' entry\n", call->name);
> }
>
> + if (call->id) {
> + entry = debugfs_create_file("id", 0444, call->dir, call,
> + &ftrace_event_id_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs '%s/id' entry\n",
> + call->name);
> + }
> +
> /* A trace may not want to export its format */
> if (!call->show_format)
> return 0;
>
> --
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 05/11] ftrace: ensure every event gets an id
2009-03-17 21:56 ` [RFC][PATCH 05/11] ftrace: ensure every event gets an id Peter Zijlstra
@ 2009-03-17 22:54 ` Steven Rostedt
0 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2009-03-17 22:54 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, paulus, linux-kernel
On Tue, 17 Mar 2009, Peter Zijlstra wrote:
> Previously only TRACE_EVENT events got ids, because only they generated
> raw output which needs to be demuxed from the trace.
>
> In order to provide a unique ID for each event, register everybody, regardless.
If this is about the TRACE_FORMAT, they use the TRACE_PRINT format, which
is exported.
Again, I need to look at the rest of the patches ;-)
-- Steve
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> kernel/trace/trace_events_stage_3.h | 15 ++++++++++++++-
> kernel/trace/trace_output.c | 5 +++++
> 2 files changed, 19 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/kernel/trace/trace_events_stage_3.h
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_events_stage_3.h
> +++ linux-2.6/kernel/trace/trace_events_stage_3.h
> @@ -130,7 +130,19 @@ static void ftrace_unreg_event_##call(vo
> { \
> unregister_trace_##call(ftrace_event_##call); \
> } \
> -
> + \
> +static struct ftrace_event_call event_##call; \
> + \
> +static int ftrace_init_event_##call(void) \
> +{ \
> + int id; \
> + \
> + id = register_ftrace_event(NULL); \
> + if (!id) \
> + return -ENODEV; \
> + event_##call.id = id; \
> + return 0; \
> +}
>
> #undef TRACE_FORMAT
> #define TRACE_FORMAT(call, proto, args, fmt) \
> @@ -140,6 +152,7 @@ __attribute__((__aligned__(4))) \
> __attribute__((section("_ftrace_events"))) event_##call = { \
> .name = #call, \
> .system = __stringify(TRACE_SYSTEM), \
> + .raw_init = ftrace_init_event_##call, \
> .regfunc = ftrace_reg_event_##call, \
> .unregfunc = ftrace_unreg_event_##call, \
> }
> Index: linux-2.6/kernel/trace/trace_output.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_output.c
> +++ linux-2.6/kernel/trace/trace_output.c
> @@ -444,6 +444,11 @@ int register_ftrace_event(struct trace_e
>
> mutex_lock(&trace_event_mutex);
>
> + if (!event) {
> + ret = next_event_type++;
> + goto out;
> + }
> +
> if (!event->type)
> event->type = next_event_type++;
> else if (event->type > __TRACE_LAST_TYPE) {
>
> --
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-17 21:56 ` [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI Peter Zijlstra
@ 2009-03-18 2:29 ` Paul Mackerras
2009-03-18 8:47 ` Peter Zijlstra
2009-03-18 4:31 ` Paul Mackerras
1 sibling, 1 reply; 24+ messages in thread
From: Paul Mackerras @ 2009-03-18 2:29 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, rostedt, linux-kernel
Peter Zijlstra writes:
> The hardware/software classification in hw_event->type became a little strained
> due to the addition of tracepoint tracing.
>
> Instead split up the field and provide a type field to explicitly specify the
> counter type, while using the event_id field to specify which event to use.
>
> Raw counters still work as before, only the raw config now goes into raw_event.
Interesting idea, but why not also use it to express the distinction
between generic and raw hardware events ids? Why not add a
PERF_TYPE_RAW_HARDWARE to this list:
> + * hw_event.type
> + */
> +enum perf_event_types {
> + PERF_TYPE_HARDWARE = 0,
> + PERF_TYPE_SOFTWARE = 1,
> + PERF_TYPE_TRACEPOINT = 2,
> +};
and get rid of the raw bit? That way, hw_event.raw_event is unique
for every different event, whereas the way you have it, you still need
to include the raw bit to get a unique id.
Paul.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 11/11] perf_counter: unify irq output code
2009-03-17 21:56 ` [RFC][PATCH 11/11] perf_counter: unify irq output code Peter Zijlstra
@ 2009-03-18 2:37 ` Paul Mackerras
2009-03-18 5:28 ` Paul Mackerras
0 siblings, 1 reply; 24+ messages in thread
From: Paul Mackerras @ 2009-03-18 2:37 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, rostedt, linux-kernel
Peter Zijlstra writes:
> Having 3 slightly different copies of the same code around does nobody any
> good. First step in revamping the output format.
Nice cleanup, and thanks for updating powerpc too. With this, I think
we'll need to update the powerpc perf_counter_interrupt a bit: we
might as well use get_perf_counter_pending() instead of the wakeup
variable now. I'll send an extra patch once I have done some testing.
Paul.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-17 21:56 ` [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI Peter Zijlstra
2009-03-18 2:29 ` Paul Mackerras
@ 2009-03-18 4:31 ` Paul Mackerras
2009-03-18 8:55 ` Peter Zijlstra
1 sibling, 1 reply; 24+ messages in thread
From: Paul Mackerras @ 2009-03-18 4:31 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, rostedt, linux-kernel
Peter Zijlstra writes:
> The hardware/software classification in hw_event->type became a little strained
> due to the addition of tracepoint tracing.
>
> Instead split up the field and provide a type field to explicitly specify the
> counter type, while using the event_id field to specify which event to use.
It would be nice if you didn't reuse the name 'type' but instead
called the field something different ('class', perhaps?) to force a
compile error on code that needs to be updated. For example, you
missed a spot in arch/powerpc/kernel/perf_counter.c and you need to
add on the patch below. (Thanks for updating powerpc BTW.)
Paul.
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index efaeecf..88b72eb 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter)
return NULL;
if ((s64)counter->hw_event.irq_period < 0)
return NULL;
- ev = counter->hw_event.type;
+ ev = counter->hw_event.event_id;
if (!counter->hw_event.raw) {
if (ev >= ppmu->n_generic ||
ppmu->generic_events[ev] == 0)
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 11/11] perf_counter: unify irq output code
2009-03-18 2:37 ` Paul Mackerras
@ 2009-03-18 5:28 ` Paul Mackerras
0 siblings, 0 replies; 24+ messages in thread
From: Paul Mackerras @ 2009-03-18 5:28 UTC (permalink / raw)
To: Peter Zijlstra, mingo, rostedt, linux-kernel
I wrote:
> Nice cleanup, and thanks for updating powerpc too. With this, I think
> we'll need to update the powerpc perf_counter_interrupt a bit: we
> might as well use get_perf_counter_pending() instead of the wakeup
> variable now. I'll send an extra patch once I have done some testing.
This seems to work OK.
Paul.
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 88b72eb..830ca9c 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -723,8 +723,6 @@ static void perf_counter_interrupt(struct pt_regs *regs)
/* counter has overflowed */
found = 1;
record_and_restart(counter, val, regs);
- if (counter->wakeup_pending)
- need_wakeup = 1;
}
}
@@ -754,17 +752,14 @@ static void perf_counter_interrupt(struct pt_regs *regs)
/*
* If we need a wakeup, check whether interrupts were soft-enabled
* when we took the interrupt. If they were, we can wake stuff up
- * immediately; otherwise we'll have to set a flag and do the
- * wakeup when interrupts get soft-enabled.
+ * immediately; otherwise we'll have do the wakeup when interrupts
+ * get soft-enabled.
*/
- if (need_wakeup) {
- if (regs->softe) {
- irq_enter();
- perf_counter_do_pending();
- irq_exit();
- } else {
- set_perf_counter_pending();
- }
+ if (get_perf_counter_pending() && regs->softe) {
+ irq_enter();
+ clear_perf_counter_pending();
+ perf_counter_do_pending();
+ irq_exit();
}
}
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-18 2:29 ` Paul Mackerras
@ 2009-03-18 8:47 ` Peter Zijlstra
2009-03-18 22:15 ` Paul Mackerras
0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-18 8:47 UTC (permalink / raw)
To: Paul Mackerras; +Cc: mingo, rostedt, linux-kernel
On Wed, 2009-03-18 at 13:29 +1100, Paul Mackerras wrote:
> Peter Zijlstra writes:
>
> > The hardware/software classification in hw_event->type became a little strained
> > due to the addition of tracepoint tracing.
> >
> > Instead split up the field and provide a type field to explicitly specify the
> > counter type, while using the event_id field to specify which event to use.
> >
> > Raw counters still work as before, only the raw config now goes into raw_event.
>
> Interesting idea, but why not also use it to express the distinction
> between generic and raw hardware events ids? Why not add a
> PERF_TYPE_RAW_HARDWARE to this list:
>
> > + * hw_event.type
> > + */
> > +enum perf_event_types {
> > + PERF_TYPE_HARDWARE = 0,
> > + PERF_TYPE_SOFTWARE = 1,
> > + PERF_TYPE_TRACEPOINT = 2,
> > +};
>
> and get rid of the raw bit? That way, hw_event.raw_event is unique
> for every different event, whereas the way you have it, you still need
> to include the raw bit to get a unique id.
Ah, I thought we should keep a pure 64 bit raw value. You never know
what hardware will do.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-18 4:31 ` Paul Mackerras
@ 2009-03-18 8:55 ` Peter Zijlstra
0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-18 8:55 UTC (permalink / raw)
To: Paul Mackerras; +Cc: mingo, rostedt, linux-kernel
On Wed, 2009-03-18 at 15:31 +1100, Paul Mackerras wrote:
> Peter Zijlstra writes:
>
> > The hardware/software classification in hw_event->type became a little strained
> > due to the addition of tracepoint tracing.
> >
> > Instead split up the field and provide a type field to explicitly specify the
> > counter type, while using the event_id field to specify which event to use.
>
> It would be nice if you didn't reuse the name 'type' but instead
> called the field something different ('class', perhaps?) to force a
> compile error on code that needs to be updated. For example, you
> missed a spot in arch/powerpc/kernel/perf_counter.c and you need to
> add on the patch below. (Thanks for updating powerpc BTW.)
Yeah, thought of that after I did the patch... :-)
Thanks.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-18 8:47 ` Peter Zijlstra
@ 2009-03-18 22:15 ` Paul Mackerras
2009-03-19 11:41 ` Peter Zijlstra
0 siblings, 1 reply; 24+ messages in thread
From: Paul Mackerras @ 2009-03-18 22:15 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: mingo, rostedt, linux-kernel
Peter Zijlstra writes:
> Ah, I thought we should keep a pure 64 bit raw value. You never know
> what hardware will do.
Oh I see, you use hw_event->raw_event if hw_event->raw is set. I
missed that before.
Still, you're putting that into hwc->config along with other bits like
ARCH_PERFMON_EVENTSEL_USR and ARCH_PERFMON_EVENTSEL_OS, so I would
think we could spare two bits for the type, leaving 62 bits for the
raw event code. And if that isn't enough, there's the
hw_event.extra_config_len field, which allows userspace to pass in
arbitrary amounts of extra PMU configuration data.
Paul.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-18 22:15 ` Paul Mackerras
@ 2009-03-19 11:41 ` Peter Zijlstra
2009-03-19 11:45 ` Peter Zijlstra
0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-19 11:41 UTC (permalink / raw)
To: Paul Mackerras; +Cc: mingo, rostedt, linux-kernel
On Thu, 2009-03-19 at 09:15 +1100, Paul Mackerras wrote:
> Peter Zijlstra writes:
>
> > Ah, I thought we should keep a pure 64 bit raw value. You never know
> > what hardware will do.
>
> Oh I see, you use hw_event->raw_event if hw_event->raw is set. I
> missed that before.
>
> Still, you're putting that into hwc->config along with other bits like
> ARCH_PERFMON_EVENTSEL_USR and ARCH_PERFMON_EVENTSEL_OS, so I would
> think we could spare two bits for the type, leaving 62 bits for the
> raw event code. And if that isn't enough, there's the
> hw_event.extra_config_len field, which allows userspace to pass in
> arbitrary amounts of extra PMU configuration data.
Good point, overflow interrupt, and usr/os/hv event filter and enable
bits are usually in the config word.
OK, how about the below? I didn't cut it to 2 bits, as that would
already exhaust the TYPE space -- then again, 60 does feel cramped a
bit..
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -27,6 +27,12 @@ enum perf_event_types {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
+
+ /*
+ * available TYPE space, raw is the max value.
+ */
+
+ PERF_TYPE_RAW = 15,
};
/*
@@ -79,14 +85,8 @@ enum perf_counter_record_type {
* Hardware event to monitor via a performance monitoring counter:
*/
struct perf_counter_hw_event {
- union {
- __u64 raw_event;
- struct {
- __u64 event_id : 32,
- type : 8,
- __reserved_0 : 24;
- };
- };
+ __u64 event_id : 60,
+ type : 4;
__u64 irq_period;
__u64 record_type;
@@ -94,7 +94,6 @@ struct perf_counter_hw_event {
__u64 disabled : 1, /* off by default */
nmi : 1, /* NMI sampling */
- raw : 1, /* raw event type */
inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */
@@ -103,7 +102,7 @@ struct perf_counter_hw_event {
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
- __reserved_1 : 54;
+ __reserved_1 : 55;
__u32 extra_config_len;
__u32 __reserved_4;
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI
2009-03-19 11:41 ` Peter Zijlstra
@ 2009-03-19 11:45 ` Peter Zijlstra
0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2009-03-19 11:45 UTC (permalink / raw)
To: Paul Mackerras; +Cc: mingo, rostedt, linux-kernel
On Thu, 2009-03-19 at 12:41 +0100, Peter Zijlstra wrote:
> On Thu, 2009-03-19 at 09:15 +1100, Paul Mackerras wrote:
> > Peter Zijlstra writes:
> >
> > > Ah, I thought we should keep a pure 64 bit raw value. You never know
> > > what hardware will do.
> >
> > Oh I see, you use hw_event->raw_event if hw_event->raw is set. I
> > missed that before.
> >
> > Still, you're putting that into hwc->config along with other bits like
> > ARCH_PERFMON_EVENTSEL_USR and ARCH_PERFMON_EVENTSEL_OS, so I would
> > think we could spare two bits for the type, leaving 62 bits for the
> > raw event code. And if that isn't enough, there's the
> > hw_event.extra_config_len field, which allows userspace to pass in
> > arbitrary amounts of extra PMU configuration data.
>
> Good point, overflow interrupt, and usr/os/hv event filter and enable
> bits are usually in the config word.
>
> OK, how about the below? I didn't cut it to 2 bits, as that would
> already exhaust the TYPE space -- then again, 60 does feel cramped a
> bit..
Hmm, we could play dirty and do:
union {
struct {
__u64 raw_event_id : 63,
raw : 1;
};
struct {
__u64 event_id : 56,
type 8;
};
};
Giving us 7 bit type space.
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2009-03-19 11:46 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-17 21:56 [RFC][PATCH 00/11] tracepoint perf counter events and other stuff Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 01/11] perf_counter: fix uninitialized usage of event_list Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 02/11] perf_counter: generic context switch event Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 03/11] ftrace: fix memory leak Peter Zijlstra
2009-03-17 22:49 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 04/11] ftrace: provide an id file for each event Peter Zijlstra
2009-03-17 22:52 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 05/11] ftrace: ensure every event gets an id Peter Zijlstra
2009-03-17 22:54 ` Steven Rostedt
2009-03-17 21:56 ` [RFC][PATCH 06/11] ftrace: event profile hooks Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 07/11] perf_counter: fix up counter free paths Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 08/11] perf_counter: hook up the tracepoint events Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 09/11] perf_counter: revamp syscall input ABI Peter Zijlstra
2009-03-18 2:29 ` Paul Mackerras
2009-03-18 8:47 ` Peter Zijlstra
2009-03-18 22:15 ` Paul Mackerras
2009-03-19 11:41 ` Peter Zijlstra
2009-03-19 11:45 ` Peter Zijlstra
2009-03-18 4:31 ` Paul Mackerras
2009-03-18 8:55 ` Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 10/11] perfcounters: abstract wakeup flag setting in core to fix powerpc build Peter Zijlstra
2009-03-17 21:56 ` [RFC][PATCH 11/11] perf_counter: unify irq output code Peter Zijlstra
2009-03-18 2:37 ` Paul Mackerras
2009-03-18 5:28 ` Paul Mackerras
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).