* [PATCH 00/10] perf tracepoint and output optimizations
@ 2010-05-21 9:02 Peter Zijlstra
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
` (9 more replies)
0 siblings, 10 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
A number of patches that reworks the perf-tracepoint infrastructure and
optimizes some of the generic perf output paths.
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 17:43 ` Frank Ch. Eigler
` (2 more replies)
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
` (8 subsequent siblings)
9 siblings, 3 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-trace-fix-irq.patch --]
[-- Type: text/plain, Size: 11059 bytes --]
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274259525.5605.10352.camel@twins>
---
include/linux/ftrace_event.h | 9 +---
include/trace/ftrace.h | 17 +++------
kernel/trace/trace_event_perf.c | 73 +++++++++++++++-------------------------
kernel/trace/trace_kprobe.c | 10 ++---
kernel/trace/trace_syscalls.c | 10 ++---
5 files changed, 47 insertions(+), 72 deletions(-)
Index: linux-2.6/include/linux/ftrace_event.h
===================================================================
--- linux-2.6.orig/include/linux/ftrace_event.h
+++ linux-2.6/include/linux/ftrace_event.h
@@ -197,20 +197,17 @@ extern void perf_trace_disable(int event
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
-extern void *
-perf_trace_buf_prepare(int size, unsigned short type, int *rctxp,
- unsigned long *irq_flags);
+extern void *perf_trace_buf_prepare(int size, unsigned short type,
+ struct pt_regs *regs, int *rctxp);
static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
- u64 count, unsigned long irq_flags, struct pt_regs *regs,
- void *event)
+ u64 count, struct pt_regs *regs, void *event)
{
struct trace_entry *entry = raw_data;
perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
perf_swevent_put_recursion_context(rctx);
- local_irq_restore(irq_flags);
}
#endif
Index: linux-2.6/include/trace/ftrace.h
===================================================================
--- linux-2.6.orig/include/trace/ftrace.h
+++ linux-2.6/include/trace/ftrace.h
@@ -768,7 +768,6 @@ perf_trace_templ_##call(struct ftrace_ev
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \
- unsigned long irq_flags; \
int __entry_size; \
int __data_size; \
int rctx; \
@@ -781,17 +780,18 @@ perf_trace_templ_##call(struct ftrace_ev
if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \
"profile buffer not large enough")) \
return; \
+ \
entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \
- __entry_size, event_call->id, &rctx, &irq_flags); \
+ __entry_size, event_call->id, __regs, &rctx); \
if (!entry) \
return; \
+ \
tstruct \
\
{ assign; } \
\
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
- __count, irq_flags, __regs, \
- event_call->perf_data); \
+ __count, __regs, event_call->perf_data); \
}
#undef DEFINE_EVENT
@@ -799,13 +799,10 @@ perf_trace_templ_##call(struct ftrace_ev
static notrace void perf_trace_##call(proto) \
{ \
struct ftrace_event_call *event_call = &event_##call; \
- struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
- \
- perf_fetch_caller_regs(__regs, 1); \
- \
- perf_trace_templ_##template(event_call, __regs, args); \
+ struct pt_regs __regs; \
\
- put_cpu_var(perf_trace_regs); \
+ perf_fetch_caller_regs(&__regs, 1); \
+ perf_trace_templ_##template(event_call, &__regs, args); \
}
#undef DEFINE_EVENT_PRINT
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -9,13 +9,9 @@
#include <linux/kprobes.h>
#include "trace.h"
-DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
-EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
-
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-static char *perf_trace_buf;
-static char *perf_trace_buf_nmi;
+static char *perf_trace_buf[4];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -29,7 +25,6 @@ static int total_ref_count;
static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
{
- char *buf;
int ret = -ENOMEM;
if (event->perf_refcount++ > 0) {
@@ -38,17 +33,16 @@ static int perf_trace_event_enable(struc
}
if (!total_ref_count) {
- buf = (char *)alloc_percpu(perf_trace_t);
- if (!buf)
- goto fail_buf;
-
- rcu_assign_pointer(perf_trace_buf, buf);
+ char *buf;
+ int i;
- buf = (char *)alloc_percpu(perf_trace_t);
- if (!buf)
- goto fail_buf_nmi;
+ for (i = 0; i < 4; i++) {
+ buf = (char *)alloc_percpu(perf_trace_t);
+ if (!buf)
+ goto fail_buf;
- rcu_assign_pointer(perf_trace_buf_nmi, buf);
+ rcu_assign_pointer(perf_trace_buf[i], buf);
+ }
}
ret = event->perf_event_enable(event);
@@ -58,14 +52,15 @@ static int perf_trace_event_enable(struc
return 0;
}
-fail_buf_nmi:
+fail_buf:
if (!total_ref_count) {
- free_percpu(perf_trace_buf_nmi);
- free_percpu(perf_trace_buf);
- perf_trace_buf_nmi = NULL;
- perf_trace_buf = NULL;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ free_percpu(perf_trace_buf[i]);
+ perf_trace_buf[i] = NULL;
+ }
}
-fail_buf:
event->perf_refcount--;
return ret;
@@ -91,19 +86,19 @@ int perf_trace_enable(int event_id, void
static void perf_trace_event_disable(struct ftrace_event_call *event)
{
- char *buf, *nmi_buf;
-
if (--event->perf_refcount > 0)
return;
event->perf_event_disable(event);
if (!--total_ref_count) {
- buf = perf_trace_buf;
- rcu_assign_pointer(perf_trace_buf, NULL);
+ char *buf[4];
+ int i;
- nmi_buf = perf_trace_buf_nmi;
- rcu_assign_pointer(perf_trace_buf_nmi, NULL);
+ for (i = 0; i < 4; i++) {
+ buf[i] = perf_trace_buf[i];
+ rcu_assign_pointer(perf_trace_buf[i], NULL);
+ }
/*
* Ensure every events in profiling have finished before
@@ -111,8 +106,8 @@ static void perf_trace_event_disable(str
*/
synchronize_sched();
- free_percpu(buf);
- free_percpu(nmi_buf);
+ for (i = 0; i < 4; i++)
+ free_percpu(buf[i]);
}
}
@@ -132,47 +127,37 @@ void perf_trace_disable(int event_id)
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
- int *rctxp, unsigned long *irq_flags)
+ struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
- int pc, cpu;
+ int pc;
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
pc = preempt_count();
- /* Protect the per cpu buffer, begin the rcu read side */
- local_irq_save(*irq_flags);
-
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
goto err_recursion;
- cpu = smp_processor_id();
-
- if (in_nmi())
- trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference_sched(perf_trace_buf);
-
+ trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
if (!trace_buf)
goto err;
- raw_data = per_cpu_ptr(trace_buf, cpu);
+ raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, *irq_flags, pc);
+ tracing_generic_entry_update(entry, regs->flags, pc);
entry->type = type;
return raw_data;
err:
perf_swevent_put_recursion_context(*rctxp);
err_recursion:
- local_irq_restore(*irq_flags);
return NULL;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
Index: linux-2.6/kernel/trace/trace_kprobe.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_kprobe.c
+++ linux-2.6/kernel/trace/trace_kprobe.c
@@ -1343,7 +1343,6 @@ static __kprobes void kprobe_perf_func(s
struct kprobe_trace_entry_head *entry;
u8 *data;
int size, __size, i;
- unsigned long irq_flags;
int rctx;
__size = sizeof(*entry) + tp->size;
@@ -1353,7 +1352,7 @@ static __kprobes void kprobe_perf_func(s
"profile buffer not large enough"))
return;
- entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
+ entry = perf_trace_buf_prepare(size, call->id, regs, &rctx);
if (!entry)
return;
@@ -1362,7 +1361,7 @@ static __kprobes void kprobe_perf_func(s
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs, call->perf_data);
+ perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data);
}
/* Kretprobe profile handler */
@@ -1374,7 +1373,6 @@ static __kprobes void kretprobe_perf_fun
struct kretprobe_trace_entry_head *entry;
u8 *data;
int size, __size, i;
- unsigned long irq_flags;
int rctx;
__size = sizeof(*entry) + tp->size;
@@ -1384,7 +1382,7 @@ static __kprobes void kretprobe_perf_fun
"profile buffer not large enough"))
return;
- entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
+ entry = perf_trace_buf_prepare(size, call->id, regs, &rctx);
if (!entry)
return;
@@ -1395,7 +1393,7 @@ static __kprobes void kretprobe_perf_fun
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
- irq_flags, regs, call->perf_data);
+ regs, call->perf_data);
}
static int probe_perf_enable(struct ftrace_event_call *call)
Index: linux-2.6/kernel/trace/trace_syscalls.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_syscalls.c
+++ linux-2.6/kernel/trace/trace_syscalls.c
@@ -438,7 +438,6 @@ static void perf_syscall_enter(struct pt
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
- unsigned long flags;
int syscall_nr;
int rctx;
int size;
@@ -461,14 +460,14 @@ static void perf_syscall_enter(struct pt
return;
rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
- sys_data->enter_event->id, &rctx, &flags);
+ sys_data->enter_event->id, regs, &rctx);
if (!rec)
return;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs,
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
sys_data->enter_event->perf_data);
}
@@ -511,7 +510,6 @@ static void perf_syscall_exit(struct pt_
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
- unsigned long flags;
int syscall_nr;
int rctx;
int size;
@@ -537,14 +535,14 @@ static void perf_syscall_exit(struct pt_
return;
rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
- sys_data->exit_event->id, &rctx, &flags);
+ sys_data->exit_event->id, regs, &rctx);
if (!rec)
return;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs,
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
sys_data->exit_event->perf_data);
}
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 9:40 ` Frederic Weisbecker
` (3 more replies)
2010-05-21 9:02 ` [PATCH 03/10] perf: Ensure IOC_OUTPUT isnt used to create multi-writer buffers Peter Zijlstra
` (7 subsequent siblings)
9 siblings, 4 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-trace-per-cpu-fold.patch --]
[-- Type: text/plain, Size: 17665 bytes --]
Avoid the swevent hash-table by using per-tracepoint hlists.
Also, avoid conditionals on the fast path by ordering with probe unregister
so that we should never get on the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/ftrace_event.h | 16 ++---
include/linux/perf_event.h | 6 +
include/trace/ftrace.h | 4 -
kernel/perf_event.c | 94 +++++++++++++++--------------
kernel/trace/trace_event_perf.c | 127 +++++++++++++++++++++-------------------
kernel/trace/trace_kprobe.c | 9 +-
kernel/trace/trace_syscalls.c | 11 ++-
7 files changed, 143 insertions(+), 124 deletions(-)
Index: linux-2.6/include/linux/ftrace_event.h
===================================================================
--- linux-2.6.orig/include/linux/ftrace_event.h
+++ linux-2.6/include/linux/ftrace_event.h
@@ -133,7 +133,7 @@ struct ftrace_event_call {
void *data;
int perf_refcount;
- void *perf_data;
+ struct hlist_head *perf_events;
int (*perf_event_enable)(struct ftrace_event_call *);
void (*perf_event_disable)(struct ftrace_event_call *);
};
@@ -192,9 +192,11 @@ struct perf_event;
DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
-extern int perf_trace_enable(int event_id, void *data);
-extern void perf_trace_disable(int event_id);
-extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+extern int perf_trace_init(struct perf_event *event);
+extern void perf_trace_destroy(struct perf_event *event);
+extern int perf_trace_enable(struct perf_event *event);
+extern void perf_trace_disable(struct perf_event *event);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
extern void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int
static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
- u64 count, struct pt_regs *regs, void *event)
+ u64 count, struct pt_regs *regs, void *head)
{
- struct trace_entry *entry = raw_data;
-
- perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
+ perf_tp_event(addr, count, raw_data, size, regs, head);
perf_swevent_put_recursion_context(rctx);
}
#endif
Index: linux-2.6/include/trace/ftrace.h
===================================================================
--- linux-2.6.orig/include/trace/ftrace.h
+++ linux-2.6/include/trace/ftrace.h
@@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_ev
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \
+ struct hlist_head *head; \
int __entry_size; \
int __data_size; \
int rctx; \
@@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_ev
\
{ assign; } \
\
+ head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
- __count, __regs, event_call->perf_data); \
+ __count, __regs, head); \
}
#undef DEFINE_EVENT
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_T
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
-static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
+static int perf_trace_event_init(struct ftrace_event_call *tp_event,
+ struct perf_event *p_event)
{
+ struct hlist_head *list;
int ret = -ENOMEM;
+ int cpu;
- if (event->perf_refcount++ > 0) {
- event->perf_data = NULL;
+ p_event->tp_event = tp_event;
+ if (tp_event->perf_refcount++ > 0)
return 0;
- }
+
+ list = alloc_percpu(struct hlist_head);
+ if (!list)
+ goto fail;
+
+ for_each_possible_cpu(cpu)
+ INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
+
+ tp_event->perf_events = list;
if (!total_ref_count) {
char *buf;
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struc
for (i = 0; i < 4; i++) {
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
- goto fail_buf;
+ goto fail;
- rcu_assign_pointer(perf_trace_buf[i], buf);
+ perf_trace_buf[i] = buf;
}
}
- ret = event->perf_event_enable(event);
- if (!ret) {
- event->perf_data = data;
- total_ref_count++;
- return 0;
- }
+ ret = tp_event->perf_event_enable(tp_event);
+ if (ret)
+ goto fail;
-fail_buf:
+ total_ref_count++;
+ return 0;
+
+fail:
if (!total_ref_count) {
int i;
@@ -61,21 +72,26 @@ fail_buf:
perf_trace_buf[i] = NULL;
}
}
- event->perf_refcount--;
+
+ if (!--tp_event->perf_refcount) {
+ free_percpu(tp_event->perf_events);
+ tp_event->perf_events = NULL;
+ }
return ret;
}
-int perf_trace_enable(int event_id, void *data)
+int perf_trace_init(struct perf_event *p_event)
{
- struct ftrace_event_call *event;
+ struct ftrace_event_call *tp_event;
+ int event_id = p_event->attr.config;
int ret = -EINVAL;
mutex_lock(&event_mutex);
- list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id && event->perf_event_enable &&
- try_module_get(event->mod)) {
- ret = perf_trace_event_enable(event, data);
+ list_for_each_entry(tp_event, &ftrace_events, list) {
+ if (tp_event->id == event_id && tp_event->perf_event_enable &&
+ try_module_get(tp_event->mod)) {
+ ret = perf_trace_event_init(tp_event, p_event);
break;
}
}
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void
return ret;
}
-static void perf_trace_event_disable(struct ftrace_event_call *event)
+int perf_trace_enable(struct perf_event *p_event)
{
- if (--event->perf_refcount > 0)
- return;
+ struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct hlist_head *list;
- event->perf_event_disable(event);
+ list = tp_event->perf_events;
+ if (WARN_ON_ONCE(!list))
+ return -EINVAL;
- if (!--total_ref_count) {
- char *buf[4];
- int i;
-
- for (i = 0; i < 4; i++) {
- buf[i] = perf_trace_buf[i];
- rcu_assign_pointer(perf_trace_buf[i], NULL);
- }
+ list = per_cpu_ptr(list, smp_processor_id());
+ hlist_add_head_rcu(&p_event->hlist_entry, list);
- /*
- * Ensure every events in profiling have finished before
- * releasing the buffers
- */
- synchronize_sched();
+ return 0;
+}
- for (i = 0; i < 4; i++)
- free_percpu(buf[i]);
- }
+void perf_trace_disable(struct perf_event *p_event)
+{
+ hlist_del_rcu(&p_event->hlist_entry);
}
-void perf_trace_disable(int event_id)
+void perf_trace_destroy(struct perf_event *p_event)
{
- struct ftrace_event_call *event;
+ struct ftrace_event_call *tp_event = p_event->tp_event;
+ int i;
- mutex_lock(&event_mutex);
- list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id) {
- perf_trace_event_disable(event);
- module_put(event->mod);
- break;
+ if (--tp_event->perf_refcount > 0)
+ return;
+
+ tp_event->perf_event_disable(tp_event);
+
+ free_percpu(tp_event->perf_events);
+ tp_event->perf_events = NULL;
+
+ if (!--total_ref_count) {
+ for (i = 0; i < 4; i++) {
+ free_percpu(perf_trace_buf[i]);
+ perf_trace_buf[i] = NULL;
}
}
- mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
- char *trace_buf, *raw_data;
+ char *raw_data;
int pc;
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(i
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
- goto err_recursion;
-
- trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
- if (!trace_buf)
- goto err;
+ return NULL;
- raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
+ raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(i
entry->type = type;
return raw_data;
-err:
- perf_swevent_put_recursion_context(*rctxp);
-err_recursion:
- return NULL;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
Index: linux-2.6/kernel/trace/trace_kprobe.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_kprobe.c
+++ linux-2.6/kernel/trace/trace_kprobe.c
@@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(s
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry_head *entry;
+ struct hlist_head *head;
u8 *data;
int size, __size, i;
int rctx;
@@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(s
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data);
+ head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
}
/* Kretprobe profile handler */
@@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_fun
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry_head *entry;
+ struct hlist_head *head;
u8 *data;
int size, __size, i;
int rctx;
@@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_fun
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
- regs, call->perf_data);
+ head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
static int probe_perf_enable(struct ftrace_event_call *call)
Index: linux-2.6/kernel/trace/trace_syscalls.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_syscalls.c
+++ linux-2.6/kernel/trace/trace_syscalls.c
@@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
+ struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
@@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
- sys_data->enter_event->perf_data);
+
+ head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
+ struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
@@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
- sys_data->exit_event->perf_data);
+ head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
int perf_sysexit_enable(struct ftrace_event_call *call)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -4004,9 +4004,6 @@ static void perf_swevent_add(struct perf
perf_swevent_overflow(event, 0, nmi, data, regs);
}
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data);
-
static int perf_exclude_event(struct perf_event *event,
struct pt_regs *regs)
{
@@ -4036,10 +4033,6 @@ static int perf_swevent_match(struct per
if (perf_exclude_event(event, regs))
return 0;
- if (event->attr.type == PERF_TYPE_TRACEPOINT &&
- !perf_tp_event_match(event, data))
- return 0;
-
return 1;
}
@@ -4094,7 +4087,7 @@ end:
int perf_swevent_get_recursion_context(void)
{
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
int rctx;
if (in_nmi())
@@ -4106,10 +4099,8 @@ int perf_swevent_get_recursion_context(v
else
rctx = 0;
- if (cpuctx->recursion[rctx]) {
- put_cpu_var(perf_cpu_context);
+ if (cpuctx->recursion[rctx])
return -1;
- }
cpuctx->recursion[rctx]++;
barrier();
@@ -4123,7 +4114,6 @@ void perf_swevent_put_recursion_context(
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
cpuctx->recursion[rctx]--;
- put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
@@ -4134,6 +4124,7 @@ void __perf_sw_event(u32 event_id, u64 n
struct perf_sample_data data;
int rctx;
+ preempt_disable_notrace();
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
return;
@@ -4143,6 +4134,7 @@ void __perf_sw_event(u32 event_id, u64 n
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
perf_swevent_put_recursion_context(rctx);
+ preempt_enable_notrace();
}
static void perf_swevent_read(struct perf_event *event)
@@ -4451,11 +4443,43 @@ static int swevent_hlist_get(struct perf
#ifdef CONFIG_EVENT_TRACING
-void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size, struct pt_regs *regs, void *event)
+static const struct pmu perf_ops_tracepoint = {
+ .enable = perf_trace_enable,
+ .disable = perf_trace_disable,
+ .read = perf_swevent_read,
+ .unthrottle = perf_swevent_unthrottle,
+};
+
+static int perf_tp_filter_match(struct perf_event *event,
+ struct perf_sample_data *data)
+{
+ void *record = data->raw->data;
+
+ if (likely(!event->filter) || filter_match_preds(event->filter, record))
+ return 1;
+ return 0;
+}
+
+static int perf_tp_event_match(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ if (perf_exclude_event(event, regs))
+ return 0;
+
+ if (!perf_tp_filter_match(event, data))
+ return 0;
+
+ return 1;
+}
+
+void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
+ struct pt_regs *regs, struct hlist_head *head)
{
- const int type = PERF_TYPE_TRACEPOINT;
struct perf_sample_data data;
+ struct perf_event *event;
+ struct hlist_node *node;
+
struct perf_raw_record raw = {
.size = entry_size,
.data = record,
@@ -4464,30 +4488,18 @@ void perf_tp_event(int event_id, u64 add
perf_sample_data_init(&data, addr);
data.raw = &raw;
- if (!event) {
- do_perf_sw_event(type, event_id, count, 1, &data, regs);
- return;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_add(event, count, 1, &data, regs);
}
-
- if (perf_swevent_match(event, type, event_id, &data, regs))
- perf_swevent_add(event, count, 1, &data, regs);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(perf_tp_event);
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data)
-{
- void *record = data->raw->data;
-
- if (likely(!event->filter) || filter_match_preds(event->filter, record))
- return 1;
- return 0;
-}
-
static void tp_perf_event_destroy(struct perf_event *event)
{
- perf_trace_disable(event->attr.config);
- swevent_hlist_put(event);
+ perf_trace_destroy(event);
}
static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4503,17 +4515,13 @@ static const struct pmu *tp_perf_event_i
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (perf_trace_enable(event->attr.config, event))
+ err = perf_trace_init(event);
+ if (err)
return NULL;
event->destroy = tp_perf_event_destroy;
- err = swevent_hlist_get(event);
- if (err) {
- perf_trace_disable(event->attr.config);
- return ERR_PTR(err);
- }
- return &perf_ops_generic;
+ return &perf_ops_tracepoint;
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4541,12 +4549,6 @@ static void perf_event_free_filter(struc
#else
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data)
-{
- return 1;
-}
-
static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
return NULL;
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -727,6 +727,7 @@ struct perf_event {
perf_overflow_handler_t overflow_handler;
#ifdef CONFIG_EVENT_TRACING
+ struct ftrace_event_call *tp_event;
struct event_filter *filter;
#endif
@@ -992,8 +993,9 @@ static inline bool perf_paranoid_kernel(
}
extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size, struct pt_regs *regs, void *event);
+extern void perf_tp_event(u64 addr, u64 count, void *record,
+ int entry_size, struct pt_regs *regs,
+ struct hlist_head *head);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 03/10] perf: Ensure IOC_OUTPUT isnt used to create multi-writer buffers
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] perf: Ensure that IOC_OUTPUT isn't " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 04/10] perf-record: Remove -M Peter Zijlstra
` (6 subsequent siblings)
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-fix-output-ioctl.patch --]
[-- Type: text/plain, Size: 1167 bytes --]
Since we want to ensure buffers only have a single writer, we must
avoid creating one with multiple.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_event.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -4885,6 +4885,13 @@ static int perf_event_set_output(struct
int fput_needed = 0;
int ret = -EINVAL;
+ /*
+ * Don't allow output of inherited per-task events. This would
+ * create performance issues due to cross cpu access.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+
if (!output_fd)
goto set;
@@ -4905,6 +4912,18 @@ static int perf_event_set_output(struct
if (event->data)
goto out;
+ /*
+ * Don't allow cross-cpu buffers
+ */
+ if (output_event->cpu != event->cpu)
+ goto out;
+
+ /*
+ * If its not a per-cpu buffer, it must be the same task.
+ */
+ if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ goto out;
+
atomic_long_inc(&output_file->f_count);
set:
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 04/10] perf-record: Remove -M
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (2 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 03/10] perf: Ensure IOC_OUTPUT isnt used to create multi-writer buffers Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 05/10] perf-record: Share per-cpu buffers Peter Zijlstra
` (5 subsequent siblings)
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-record-output.patch --]
[-- Type: text/plain, Size: 2835 bytes --]
Since it is not allowed to create cross-cpu (or cross-task) buffers,
this option is no longer valid.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
tools/perf/builtin-record.c | 36 ++++++++++++------------------------
1 file changed, 12 insertions(+), 24 deletions(-)
Index: linux-2.6/tools/perf/builtin-record.c
===================================================================
--- linux-2.6.orig/tools/perf/builtin-record.c
+++ linux-2.6/tools/perf/builtin-record.c
@@ -61,8 +61,6 @@ static bool call_graph = false;
static bool inherit_stat = false;
static bool no_samples = false;
static bool sample_address = false;
-static bool multiplex = false;
-static int multiplex_fd = -1;
static long samples = 0;
static struct timeval last_read;
@@ -381,27 +379,19 @@ try_again:
*/
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter][thread_index];
- if (multiplex && multiplex_fd == -1)
- multiplex_fd = fd[nr_cpu][counter][thread_index];
- if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) {
-
- ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
- assert(ret != -1);
- } else {
- event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mmap_array[nr_cpu][counter][thread_index].counter = counter;
- mmap_array[nr_cpu][counter][thread_index].prev = 0;
- mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
- if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
+ event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+ event_array[nr_poll].events = POLLIN;
+ nr_poll++;
+
+ mmap_array[nr_cpu][counter][thread_index].counter = counter;
+ mmap_array[nr_cpu][counter][thread_index].prev = 0;
+ mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
+ mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+ if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
+ error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ exit(-1);
}
if (filter != NULL) {
@@ -835,8 +825,6 @@ static const struct option options[] = {
"Sample addresses"),
OPT_BOOLEAN('n', "no-samples", &no_samples,
"don't sample"),
- OPT_BOOLEAN('M', "multiplex", &multiplex,
- "multiplex counter output in a single channel"),
OPT_END()
};
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 05/10] perf-record: Share per-cpu buffers
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (3 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 04/10] perf-record: Remove -M Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 9:44 ` Frederic Weisbecker
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 06/10] perf: Fix wakeup storm for RO mmap()s Peter Zijlstra
` (4 subsequent siblings)
9 siblings, 2 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-record-output-2.patch --]
[-- Type: text/plain, Size: 3181 bytes --]
It seems a waste of space to create a buffer per event, share it per-cpu.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
tools/perf/builtin-record.c | 52 +++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 24 deletions(-)
Index: linux-2.6/tools/perf/builtin-record.c
===================================================================
--- linux-2.6.orig/tools/perf/builtin-record.c
+++ linux-2.6/tools/perf/builtin-record.c
@@ -85,7 +85,7 @@ struct mmap_data {
unsigned int prev;
};
-static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS];
static unsigned long mmap_read_head(struct mmap_data *md)
{
@@ -380,18 +380,29 @@ try_again:
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mmap_array[nr_cpu][counter][thread_index].counter = counter;
- mmap_array[nr_cpu][counter][thread_index].prev = 0;
- mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
- if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
+ if (counter || thread_index) {
+ ret = ioctl(fd[nr_cpu][counter][thread_index],
+ PERF_EVENT_IOC_SET_OUTPUT,
+ fd[nr_cpu][0][0]);
+ if (ret) {
+ error("failed to set output: %d (%s)\n", errno,
+ strerror(errno));
+ exit(-1);
+ }
+ } else {
+ mmap_array[nr_cpu].counter = counter;
+ mmap_array[nr_cpu].prev = 0;
+ mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
+ mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+ if (mmap_array[nr_cpu].base == MAP_FAILED) {
+ error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ exit(-1);
+ }
+
+ event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+ event_array[nr_poll].events = POLLIN;
+ nr_poll++;
}
if (filter != NULL) {
@@ -492,16 +503,11 @@ static struct perf_event_header finished
static void mmap_read_all(void)
{
- int i, counter, thread;
+ int i;
for (i = 0; i < nr_cpu; i++) {
- for (counter = 0; counter < nr_counters; counter++) {
- for (thread = 0; thread < thread_num; thread++) {
- if (mmap_array[i][counter][thread].base)
- mmap_read(&mmap_array[i][counter][thread]);
- }
-
- }
+ if (mmap_array[i].base)
+ mmap_read(&mmap_array[i]);
}
if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -876,9 +882,7 @@ int cmd_record(int argc, const char **ar
for (i = 0; i < MAX_NR_CPUS; i++) {
for (j = 0; j < MAX_COUNTERS; j++) {
fd[i][j] = malloc(sizeof(int)*thread_num);
- mmap_array[i][j] = zalloc(
- sizeof(struct mmap_data)*thread_num);
- if (!fd[i][j] || !mmap_array[i][j])
+ if (!fd[i][j])
return -ENOMEM;
}
}
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 06/10] perf: Fix wakeup storm for RO mmap()s
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (4 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 05/10] perf-record: Share per-cpu buffers Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 07/10] perf: Optimize perf_output_copy Peter Zijlstra
` (3 subsequent siblings)
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-fix-RO-wakeups.patch --]
[-- Type: text/plain, Size: 1481 bytes --]
RO mmap()s don't update the tail pointer, so comparing against
it for determining the written data size doesn't really do any good.
Keep track of when we last did a wakeup, and compare against that.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 2 +-
kernel/perf_event.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -599,7 +599,7 @@ struct perf_mmap_data {
local_t head; /* write position */
local_t nest; /* nested writers */
local_t events; /* event limit */
- local_t wakeup; /* needs a wakeup */
+ local_t wakeup; /* wakeup stamp */
local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -3055,8 +3055,8 @@ int perf_output_begin(struct perf_output
handle->offset = offset;
handle->head = head;
- if (head - tail > data->watermark)
- local_inc(&data->wakeup);
+ if (head - local_read(&data->wakeup) > data->watermark)
+ local_add(data->watermark, &data->wakeup);
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 07/10] perf: Optimize perf_output_copy
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (5 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 06/10] perf: Fix wakeup storm for RO mmap()s Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] perf: Optimize perf_output_copy() tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 08/10] perf: Optimize the !vmalloc backed buffer Peter Zijlstra
` (2 subsequent siblings)
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-opt-output_copy.patch --]
[-- Type: text/plain, Size: 3047 bytes --]
Reduce the clutter in perf_output_copy() by keeping an interator in
perf_output_handle.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 3 ++
kernel/perf_event.c | 54 +++++++++++++++++++++------------------------
2 files changed, 29 insertions(+), 28 deletions(-)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -806,6 +806,9 @@ struct perf_output_handle {
unsigned long head;
unsigned long offset;
unsigned long wakeup;
+ unsigned long size;
+ void *addr;
+ int page;
int nmi;
int sample;
};
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -2960,39 +2960,30 @@ again:
void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- unsigned int pages_mask;
- unsigned long offset;
- unsigned int size;
- void **pages;
-
- offset = handle->offset;
- pages_mask = handle->data->nr_pages - 1;
- pages = handle->data->data_pages;
-
- do {
- unsigned long page_offset;
- unsigned long page_size;
- int nr;
-
- nr = (offset >> PAGE_SHIFT) & pages_mask;
- page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
- page_offset = offset & (page_size - 1);
- size = min_t(unsigned int, page_size - page_offset, len);
-
- memcpy(pages[nr] + page_offset, buf, size);
-
- len -= size;
- buf += size;
- offset += size;
- } while (len);
-
- handle->offset = offset;
+ handle->offset += len;
/*
* Check we didn't copy past our reservation window, taking the
* possible unsigned int wrap into account.
*/
- WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+ if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
+ return;
+
+ do {
+ unsigned long size = min(handle->size, len);
+
+ memcpy(handle->addr, buf, size);
+
+ len -= size;
+ handle->addr += size;
+ handle->size -= size;
+ if (!handle->size) {
+ handle->page++;
+ handle->page &= handle->data->nr_pages - 1;
+ handle->addr = handle->data->data_pages[handle->page];
+ handle->size = PAGE_SIZE << handle->data->data_order;
+ }
+ } while (len);
}
int perf_output_begin(struct perf_output_handle *handle,
@@ -3058,6 +3049,13 @@ int perf_output_begin(struct perf_output
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
+ handle->page = handle->offset >> (PAGE_SHIFT + data->data_order);
+ handle->page &= data->nr_pages - 1;
+ handle->size = handle->offset & ((PAGE_SIZE << data->data_order) - 1);
+ handle->addr = data->data_pages[handle->page];
+ handle->addr += handle->size;
+ handle->size = (PAGE_SIZE << data->data_order) - handle->size;
+
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 08/10] perf: Optimize the !vmalloc backed buffer
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (6 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 07/10] perf: Optimize perf_output_copy Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 09/10] perf: Remove more fastpath code Peter Zijlstra
2010-05-21 9:02 ` [PATCH 10/10] perf: Optimize perf_tp_event_match Peter Zijlstra
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-opt-output-order.patch --]
[-- Type: text/plain, Size: 4263 bytes --]
Reduce code and data by using the knowledge that for !PERF_USE_VMALLOC
data_order is always 0.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 2 +-
kernel/perf_event.c | 41 ++++++++++++++++++++++++++---------------
2 files changed, 27 insertions(+), 16 deletions(-)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -588,8 +588,8 @@ struct perf_mmap_data {
struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
+ int page_order; /* allocation order */
#endif
- int data_order; /* allocation order */
int nr_pages; /* nr of data pages */
int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -2297,11 +2297,6 @@ unlock:
rcu_read_unlock();
}
-static unsigned long perf_data_size(struct perf_mmap_data *data)
-{
- return data->nr_pages << (PAGE_SHIFT + data->data_order);
-}
-
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2359,7 +2354,6 @@ perf_mmap_data_alloc(struct perf_event *
goto fail_data_pages;
}
- data->data_order = 0;
data->nr_pages = nr_pages;
return data;
@@ -2395,6 +2389,11 @@ static void perf_mmap_data_free(struct p
kfree(data);
}
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return 0;
+}
+
#else
/*
@@ -2403,10 +2402,15 @@ static void perf_mmap_data_free(struct p
* Required for architectures that have d-cache aliasing issues.
*/
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return data->page_order;
+}
+
static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
- if (pgoff > (1UL << data->data_order))
+ if (pgoff > (1UL << page_order(data)))
return NULL;
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
@@ -2426,7 +2430,7 @@ static void perf_mmap_data_free_work(str
int i, nr;
data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << data->data_order;
+ nr = 1 << page_order(data);
base = data->user_page;
for (i = 0; i < nr + 1; i++)
@@ -2465,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *
data->user_page = all_buf;
data->data_pages[0] = all_buf + PAGE_SIZE;
- data->data_order = ilog2(nr_pages);
+ data->page_order = ilog2(nr_pages);
data->nr_pages = 1;
return data;
@@ -2479,6 +2483,11 @@ fail:
#endif
+static unsigned long perf_data_size(struct perf_mmap_data *data)
+{
+ return data->nr_pages << (PAGE_SHIFT + page_order(data));
+}
+
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
@@ -2978,10 +2987,12 @@ void perf_output_copy(struct perf_output
handle->addr += size;
handle->size -= size;
if (!handle->size) {
+ struct perf_mmap_data *data = handle->data;
+
handle->page++;
- handle->page &= handle->data->nr_pages - 1;
- handle->addr = handle->data->data_pages[handle->page];
- handle->size = PAGE_SIZE << handle->data->data_order;
+ handle->page &= data->nr_pages - 1;
+ handle->addr = data->data_pages[handle->page];
+ handle->size = PAGE_SIZE << page_order(data);
}
} while (len);
}
@@ -3049,12 +3060,12 @@ int perf_output_begin(struct perf_output
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
- handle->page = handle->offset >> (PAGE_SHIFT + data->data_order);
+ handle->page = handle->offset >> (PAGE_SHIFT + page_order(data));
handle->page &= data->nr_pages - 1;
- handle->size = handle->offset & ((PAGE_SIZE << data->data_order) - 1);
+ handle->size = handle->offset & ((PAGE_SIZE << page_order(data)) - 1);
handle->addr = data->data_pages[handle->page];
handle->addr += handle->size;
- handle->size = (PAGE_SIZE << data->data_order) - handle->size;
+ handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 09/10] perf: Remove more fastpath code
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (7 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 08/10] perf: Optimize the !vmalloc backed buffer Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:15 ` Steven Rostedt
2010-05-21 11:30 ` [tip:perf/core] perf: Remove more code from the fastpath tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 10/10] perf: Optimize perf_tp_event_match Peter Zijlstra
9 siblings, 2 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-opt-output-more.patch --]
[-- Type: text/plain, Size: 2247 bytes --]
Sanity checks cost instructions
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 2 --
kernel/perf_event.c | 20 ++++----------------
2 files changed, 4 insertions(+), 18 deletions(-)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -803,8 +803,6 @@ struct perf_cpu_context {
struct perf_output_handle {
struct perf_event *event;
struct perf_mmap_data *data;
- unsigned long head;
- unsigned long offset;
unsigned long wakeup;
unsigned long size;
void *addr;
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -2966,20 +2966,11 @@ again:
preempt_enable();
}
-void perf_output_copy(struct perf_output_handle *handle,
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- handle->offset += len;
-
- /*
- * Check we didn't copy past our reservation window, taking the
- * possible unsigned int wrap into account.
- */
- if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
- return;
-
do {
- unsigned long size = min(handle->size, len);
+ unsigned long size = min_t(unsigned long, handle->size, len);
memcpy(handle->addr, buf, size);
@@ -3054,15 +3045,12 @@ int perf_output_begin(struct perf_output
goto fail;
} while (local_cmpxchg(&data->head, offset, head) != offset);
- handle->offset = offset;
- handle->head = head;
-
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
- handle->page = handle->offset >> (PAGE_SHIFT + page_order(data));
+ handle->page = offset >> (PAGE_SHIFT + page_order(data));
handle->page &= data->nr_pages - 1;
- handle->size = handle->offset & ((PAGE_SIZE << page_order(data)) - 1);
+ handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
handle->addr = data->data_pages[handle->page];
handle->addr += handle->size;
handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 10/10] perf: Optimize perf_tp_event_match
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
` (8 preceding siblings ...)
2010-05-21 9:02 ` [PATCH 09/10] perf: Remove more fastpath code Peter Zijlstra
@ 2010-05-21 9:02 ` Peter Zijlstra
2010-05-21 11:30 ` [tip:perf/core] perf: Optimize perf_tp_event_match() tip-bot for Peter Zijlstra
9 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 9:02 UTC (permalink / raw)
To: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
Cc: Frederic Weisbecker, Steven Rostedt, LKML, Peter Zijlstra
[-- Attachment #1: perf-opt-exl.patch --]
[-- Type: text/plain, Size: 751 bytes --]
Since we know tracepoints come from kernel context, avoid conditionals
that try and establish that very fact.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_event.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -4477,7 +4477,10 @@ static int perf_tp_event_match(struct pe
struct perf_sample_data *data,
struct pt_regs *regs)
{
- if (perf_exclude_event(event, regs))
+ /*
+ * All tracepoints are from kernel-space.
+ */
+ if (event->attr.exclude_kernel)
return 0;
if (!perf_tp_filter_match(event, data))
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
@ 2010-05-21 9:40 ` Frederic Weisbecker
2010-05-21 10:02 ` Peter Zijlstra
2010-05-21 10:41 ` [PATCH 02b/10] perf, trace: Fix probe unregister race Peter Zijlstra
` (2 subsequent siblings)
3 siblings, 1 reply; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 9:40 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 11:02:03AM +0200, Peter Zijlstra wrote:
> Avoid the swevent hash-table by using per-tracepoint hlists.
>
> Also, avoid conditionals on the fast path by ordering with probe unregister
> so that we should never get on the callback path without the data being there.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> include/linux/ftrace_event.h | 16 ++---
> include/linux/perf_event.h | 6 +
> include/trace/ftrace.h | 4 -
> kernel/perf_event.c | 94 +++++++++++++++--------------
> kernel/trace/trace_event_perf.c | 127 +++++++++++++++++++++-------------------
> kernel/trace/trace_kprobe.c | 9 +-
> kernel/trace/trace_syscalls.c | 11 ++-
> 7 files changed, 143 insertions(+), 124 deletions(-)
>
> Index: linux-2.6/include/linux/ftrace_event.h
> ===================================================================
> --- linux-2.6.orig/include/linux/ftrace_event.h
> +++ linux-2.6/include/linux/ftrace_event.h
> @@ -133,7 +133,7 @@ struct ftrace_event_call {
> void *data;
>
> int perf_refcount;
> - void *perf_data;
> + struct hlist_head *perf_events;
> int (*perf_event_enable)(struct ftrace_event_call *);
> void (*perf_event_disable)(struct ftrace_event_call *);
> };
> @@ -192,9 +192,11 @@ struct perf_event;
>
> DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
>
> -extern int perf_trace_enable(int event_id, void *data);
> -extern void perf_trace_disable(int event_id);
> -extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
> +extern int perf_trace_init(struct perf_event *event);
> +extern void perf_trace_destroy(struct perf_event *event);
> +extern int perf_trace_enable(struct perf_event *event);
> +extern void perf_trace_disable(struct perf_event *event);
> +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
> char *filter_str);
> extern void ftrace_profile_free_filter(struct perf_event *event);
> extern void *perf_trace_buf_prepare(int size, unsigned short type,
> @@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int
>
> static inline void
> perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
> - u64 count, struct pt_regs *regs, void *event)
> + u64 count, struct pt_regs *regs, void *head)
> {
> - struct trace_entry *entry = raw_data;
> -
> - perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
> + perf_tp_event(addr, count, raw_data, size, regs, head);
> perf_swevent_put_recursion_context(rctx);
> }
> #endif
> Index: linux-2.6/include/trace/ftrace.h
> ===================================================================
> --- linux-2.6.orig/include/trace/ftrace.h
> +++ linux-2.6/include/trace/ftrace.h
> @@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_ev
> struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
> struct ftrace_raw_##call *entry; \
> u64 __addr = 0, __count = 1; \
> + struct hlist_head *head; \
> int __entry_size; \
> int __data_size; \
> int rctx; \
> @@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_ev
> \
> { assign; } \
> \
> + head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
Should be rcu_dereference_sched ?
> perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
> - __count, __regs, event_call->perf_data); \
> + __count, __regs, head); \
> }
>
> #undef DEFINE_EVENT
> Index: linux-2.6/kernel/trace/trace_event_perf.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_event_perf.c
> +++ linux-2.6/kernel/trace/trace_event_perf.c
> @@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_T
> /* Count the events in use (per event id, not per instance) */
> static int total_ref_count;
>
> -static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
> +static int perf_trace_event_init(struct ftrace_event_call *tp_event,
> + struct perf_event *p_event)
> {
> + struct hlist_head *list;
> int ret = -ENOMEM;
> + int cpu;
>
> - if (event->perf_refcount++ > 0) {
> - event->perf_data = NULL;
> + p_event->tp_event = tp_event;
> + if (tp_event->perf_refcount++ > 0)
> return 0;
> - }
> +
> + list = alloc_percpu(struct hlist_head);
> + if (!list)
> + goto fail;
> +
> + for_each_possible_cpu(cpu)
> + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
> +
> + tp_event->perf_events = list;
I suspect this must be rcu_assign_pointer.
>
> if (!total_ref_count) {
> char *buf;
> @@ -39,20 +50,20 @@ static int perf_trace_event_enable(struc
> for (i = 0; i < 4; i++) {
> buf = (char *)alloc_percpu(perf_trace_t);
> if (!buf)
> - goto fail_buf;
> + goto fail;
>
> - rcu_assign_pointer(perf_trace_buf[i], buf);
> + perf_trace_buf[i] = buf;
> }
> }
>
> - ret = event->perf_event_enable(event);
> - if (!ret) {
> - event->perf_data = data;
> - total_ref_count++;
> - return 0;
> - }
> + ret = tp_event->perf_event_enable(tp_event);
> + if (ret)
> + goto fail;
>
> -fail_buf:
> + total_ref_count++;
> + return 0;
> +
> +fail:
> if (!total_ref_count) {
> int i;
>
> @@ -61,21 +72,26 @@ fail_buf:
> perf_trace_buf[i] = NULL;
> }
> }
> - event->perf_refcount--;
> +
> + if (!--tp_event->perf_refcount) {
> + free_percpu(tp_event->perf_events);
> + tp_event->perf_events = NULL;
> + }
>
> return ret;
> }
>
> -int perf_trace_enable(int event_id, void *data)
> +int perf_trace_init(struct perf_event *p_event)
> {
> - struct ftrace_event_call *event;
> + struct ftrace_event_call *tp_event;
> + int event_id = p_event->attr.config;
> int ret = -EINVAL;
>
> mutex_lock(&event_mutex);
> - list_for_each_entry(event, &ftrace_events, list) {
> - if (event->id == event_id && event->perf_event_enable &&
> - try_module_get(event->mod)) {
> - ret = perf_trace_event_enable(event, data);
> + list_for_each_entry(tp_event, &ftrace_events, list) {
> + if (tp_event->id == event_id && tp_event->perf_event_enable &&
> + try_module_get(tp_event->mod)) {
> + ret = perf_trace_event_init(tp_event, p_event);
> break;
> }
> }
> @@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void
> return ret;
> }
>
> -static void perf_trace_event_disable(struct ftrace_event_call *event)
> +int perf_trace_enable(struct perf_event *p_event)
> {
> - if (--event->perf_refcount > 0)
> - return;
> + struct ftrace_event_call *tp_event = p_event->tp_event;
> + struct hlist_head *list;
>
> - event->perf_event_disable(event);
> + list = tp_event->perf_events;
> + if (WARN_ON_ONCE(!list))
> + return -EINVAL;
>
> - if (!--total_ref_count) {
> - char *buf[4];
> - int i;
> -
> - for (i = 0; i < 4; i++) {
> - buf[i] = perf_trace_buf[i];
> - rcu_assign_pointer(perf_trace_buf[i], NULL);
> - }
> + list = per_cpu_ptr(list, smp_processor_id());
> + hlist_add_head_rcu(&p_event->hlist_entry, list);
Ah and may be small comment, because using the hlist api here
may puzzle more people than just me ;)
> - /*
> - * Ensure every events in profiling have finished before
> - * releasing the buffers
> - */
> - synchronize_sched();
> + return 0;
> +}
>
> - for (i = 0; i < 4; i++)
> - free_percpu(buf[i]);
> - }
> +void perf_trace_disable(struct perf_event *p_event)
> +{
> + hlist_del_rcu(&p_event->hlist_entry);
> }
>
> -void perf_trace_disable(int event_id)
> +void perf_trace_destroy(struct perf_event *p_event)
> {
> - struct ftrace_event_call *event;
> + struct ftrace_event_call *tp_event = p_event->tp_event;
> + int i;
>
> - mutex_lock(&event_mutex);
> - list_for_each_entry(event, &ftrace_events, list) {
> - if (event->id == event_id) {
> - perf_trace_event_disable(event);
> - module_put(event->mod);
> - break;
> + if (--tp_event->perf_refcount > 0)
> + return;
> +
> + tp_event->perf_event_disable(tp_event);
Don't we need a rcu_synchronize_sched() here?
> + free_percpu(tp_event->perf_events);
> + tp_event->perf_events = NULL;
And rcu_assign?
> +
> + if (!--total_ref_count) {
> + for (i = 0; i < 4; i++) {
> + free_percpu(perf_trace_buf[i]);
> + perf_trace_buf[i] = NULL;
> }
> }
> - mutex_unlock(&event_mutex);
> }
>
> __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
> struct pt_regs *regs, int *rctxp)
> {
> struct trace_entry *entry;
> - char *trace_buf, *raw_data;
> + char *raw_data;
> int pc;
>
> BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
> @@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(i
>
> *rctxp = perf_swevent_get_recursion_context();
> if (*rctxp < 0)
> - goto err_recursion;
> -
> - trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
> - if (!trace_buf)
> - goto err;
> + return NULL;
>
> - raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
> + raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
Needs rcu_dereference_sched too. And this could be __this_cpu_var()
>
> /* zero the dead bytes from align to not leak stack to user */
> memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
> @@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(i
> entry->type = type;
>
> return raw_data;
> -err:
> - perf_swevent_put_recursion_context(*rctxp);
> -err_recursion:
> - return NULL;
> }
> EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
> Index: linux-2.6/kernel/trace/trace_kprobe.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_kprobe.c
> +++ linux-2.6/kernel/trace/trace_kprobe.c
> @@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(s
> struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
> struct ftrace_event_call *call = &tp->call;
> struct kprobe_trace_entry_head *entry;
> + struct hlist_head *head;
> u8 *data;
> int size, __size, i;
> int rctx;
> @@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(s
> for (i = 0; i < tp->nr_args; i++)
> call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
>
> - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data);
> + head = per_cpu_ptr(call->perf_events, smp_processor_id());
Same
> + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
> }
>
> /* Kretprobe profile handler */
> @@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_fun
> struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
> struct ftrace_event_call *call = &tp->call;
> struct kretprobe_trace_entry_head *entry;
> + struct hlist_head *head;
> u8 *data;
> int size, __size, i;
> int rctx;
> @@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_fun
> for (i = 0; i < tp->nr_args; i++)
> call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
>
> - perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
> - regs, call->perf_data);
> + head = per_cpu_ptr(call->perf_events, smp_processor_id());
ditto
> + perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
> }
>
> static int probe_perf_enable(struct ftrace_event_call *call)
> Index: linux-2.6/kernel/trace/trace_syscalls.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_syscalls.c
> +++ linux-2.6/kernel/trace/trace_syscalls.c
> @@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt
> {
> struct syscall_metadata *sys_data;
> struct syscall_trace_enter *rec;
> + struct hlist_head *head;
> int syscall_nr;
> int rctx;
> int size;
> @@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt
> rec->nr = syscall_nr;
> syscall_get_arguments(current, regs, 0, sys_data->nb_args,
> (unsigned long *)&rec->args);
> - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
> - sys_data->enter_event->perf_data);
> +
> + head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
ditto
> + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
> }
>
> int perf_sysenter_enable(struct ftrace_event_call *call)
> @@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_
> {
> struct syscall_metadata *sys_data;
> struct syscall_trace_exit *rec;
> + struct hlist_head *head;
> int syscall_nr;
> int rctx;
> int size;
> @@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_
> rec->nr = syscall_nr;
> rec->ret = syscall_get_return_value(current, regs);
>
> - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
> - sys_data->exit_event->perf_data);
> + head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
and ditto :)
> + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
> }
>
> int perf_sysexit_enable(struct ftrace_event_call *call)
> Index: linux-2.6/kernel/perf_event.c
> ===================================================================
> --- linux-2.6.orig/kernel/perf_event.c
> +++ linux-2.6/kernel/perf_event.c
> @@ -4004,9 +4004,6 @@ static void perf_swevent_add(struct perf
> perf_swevent_overflow(event, 0, nmi, data, regs);
> }
>
> -static int perf_tp_event_match(struct perf_event *event,
> - struct perf_sample_data *data);
> -
> static int perf_exclude_event(struct perf_event *event,
> struct pt_regs *regs)
> {
> @@ -4036,10 +4033,6 @@ static int perf_swevent_match(struct per
> if (perf_exclude_event(event, regs))
> return 0;
>
> - if (event->attr.type == PERF_TYPE_TRACEPOINT &&
> - !perf_tp_event_match(event, data))
> - return 0;
> -
> return 1;
> }
>
> @@ -4094,7 +4087,7 @@ end:
>
> int perf_swevent_get_recursion_context(void)
> {
> - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
> + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
> int rctx;
>
> if (in_nmi())
> @@ -4106,10 +4099,8 @@ int perf_swevent_get_recursion_context(v
> else
> rctx = 0;
>
> - if (cpuctx->recursion[rctx]) {
> - put_cpu_var(perf_cpu_context);
> + if (cpuctx->recursion[rctx])
> return -1;
> - }
>
> cpuctx->recursion[rctx]++;
> barrier();
> @@ -4123,7 +4114,6 @@ void perf_swevent_put_recursion_context(
> struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
> barrier();
> cpuctx->recursion[rctx]--;
> - put_cpu_var(perf_cpu_context);
> }
> EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
>
> @@ -4134,6 +4124,7 @@ void __perf_sw_event(u32 event_id, u64 n
> struct perf_sample_data data;
> int rctx;
>
> + preempt_disable_notrace();
Why is this needed. We have the recursion context protection already.
Ok, otherwise looks good.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 05/10] perf-record: Share per-cpu buffers
2010-05-21 9:02 ` [PATCH 05/10] perf-record: Share per-cpu buffers Peter Zijlstra
@ 2010-05-21 9:44 ` Frederic Weisbecker
2010-05-21 10:03 ` Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
1 sibling, 1 reply; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 9:44 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 11:02:06AM +0200, Peter Zijlstra wrote:
> It seems a waste of space to create a buffer per event, share it per-cpu.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> tools/perf/builtin-record.c | 52 +++++++++++++++++++++++---------------------
> 1 file changed, 28 insertions(+), 24 deletions(-)
>
> Index: linux-2.6/tools/perf/builtin-record.c
> ===================================================================
> --- linux-2.6.orig/tools/perf/builtin-record.c
> +++ linux-2.6/tools/perf/builtin-record.c
> @@ -85,7 +85,7 @@ struct mmap_data {
> unsigned int prev;
> };
>
> -static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
> +static struct mmap_data mmap_array[MAX_NR_CPUS];
>
> static unsigned long mmap_read_head(struct mmap_data *md)
> {
> @@ -380,18 +380,29 @@ try_again:
> if (group && group_fd == -1)
> group_fd = fd[nr_cpu][counter][thread_index];
>
> - event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
> - event_array[nr_poll].events = POLLIN;
> - nr_poll++;
> -
> - mmap_array[nr_cpu][counter][thread_index].counter = counter;
> - mmap_array[nr_cpu][counter][thread_index].prev = 0;
> - mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
> - mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
> - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
> - if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
> - error("failed to mmap with %d (%s)\n", errno, strerror(errno));
> - exit(-1);
> + if (counter || thread_index) {
> + ret = ioctl(fd[nr_cpu][counter][thread_index],
> + PERF_EVENT_IOC_SET_OUTPUT,
> + fd[nr_cpu][0][0]);
> + if (ret) {
> + error("failed to set output: %d (%s)\n", errno,
> + strerror(errno));
> + exit(-1);
> + }
> + } else {
> + mmap_array[nr_cpu].counter = counter;
> + mmap_array[nr_cpu].prev = 0;
> + mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
If you do this multiplex per cpu (which I think is a very good thing), you
should also increase the default value of mmap_pages as it might not be big
enough anymore.
> + mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
> + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
> + if (mmap_array[nr_cpu].base == MAP_FAILED) {
> + error("failed to mmap with %d (%s)\n", errno, strerror(errno));
> + exit(-1);
> + }
> +
> + event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
> + event_array[nr_poll].events = POLLIN;
> + nr_poll++;
> }
>
> if (filter != NULL) {
> @@ -492,16 +503,11 @@ static struct perf_event_header finished
>
> static void mmap_read_all(void)
> {
> - int i, counter, thread;
> + int i;
>
> for (i = 0; i < nr_cpu; i++) {
> - for (counter = 0; counter < nr_counters; counter++) {
> - for (thread = 0; thread < thread_num; thread++) {
> - if (mmap_array[i][counter][thread].base)
> - mmap_read(&mmap_array[i][counter][thread]);
> - }
> -
> - }
> + if (mmap_array[i].base)
> + mmap_read(&mmap_array[i]);
> }
>
> if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
> @@ -876,9 +882,7 @@ int cmd_record(int argc, const char **ar
> for (i = 0; i < MAX_NR_CPUS; i++) {
> for (j = 0; j < MAX_COUNTERS; j++) {
> fd[i][j] = malloc(sizeof(int)*thread_num);
> - mmap_array[i][j] = zalloc(
> - sizeof(struct mmap_data)*thread_num);
> - if (!fd[i][j] || !mmap_array[i][j])
> + if (!fd[i][j])
> return -ENOMEM;
> }
> }
>
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 9:40 ` Frederic Weisbecker
@ 2010-05-21 10:02 ` Peter Zijlstra
2010-05-21 10:13 ` Frederic Weisbecker
0 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:02 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, 2010-05-21 at 11:40 +0200, Frederic Weisbecker wrote:
> On Fri, May 21, 2010 at 11:02:03AM +0200, Peter Zijlstra wrote:
> > Also, avoid conditionals on the fast path by ordering with probe unregister
> > so that we should never get on the callback path without the data being there.
> >
> \
> > + head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
> Should be rcu_dereference_sched ?
No, I removed all that rcu stuff and synchronized against the probe
unregister.
I assumed that after probe unregister a tracepoint callback doesn't
happen, which then guarantees we should never get !head.
> > + for_each_possible_cpu(cpu)
> > + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
> > +
> > + tp_event->perf_events = list;
>
>
>
> I suspect this must be rcu_assign_pointer.
Same thing as above, I do this before probe register, so I see no need
for RCU.
> > + list = per_cpu_ptr(list, smp_processor_id());
> > + hlist_add_head_rcu(&p_event->hlist_entry, list);
>
>
>
> Ah and may be small comment, because using the hlist api here
> may puzzle more people than just me ;)
What exactly is the puzzlement about?
> > + if (--tp_event->perf_refcount > 0)
> > + return;
> > +
> > + tp_event->perf_event_disable(tp_event);
>
>
>
> Don't we need a rcu_synchronize_sched() here?
Doesn't probe unregister synchronize things against its own callback?
> > + free_percpu(tp_event->perf_events);
> > + tp_event->perf_events = NULL;
>
>
>
> And rcu_assign?
Which again, makes any use of RCU unneeded.
> > + raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
>
>
>
> Needs rcu_dereference_sched too. And this could be __this_cpu_var()
Ahh! so that is what its called.
> > + preempt_disable_notrace();
>
>
> Why is this needed. We have the recursion context protection already.
Because:
@@ -4094,7 +4087,7 @@ end:
int perf_swevent_get_recursion_context(void)
{
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
int rctx;
if (in_nmi())
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 05/10] perf-record: Share per-cpu buffers
2010-05-21 9:44 ` Frederic Weisbecker
@ 2010-05-21 10:03 ` Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:03 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, 2010-05-21 at 11:44 +0200, Frederic Weisbecker wrote:
> If you do this multiplex per cpu (which I think is a very good thing), you
> should also increase the default value of mmap_pages as it might not be big
> enough anymore.
Hmm, I didn't notice any problems with that.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:02 ` Peter Zijlstra
@ 2010-05-21 10:13 ` Frederic Weisbecker
2010-05-21 10:15 ` Peter Zijlstra
2010-05-21 10:19 ` Peter Zijlstra
0 siblings, 2 replies; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 10:13 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 12:02:05PM +0200, Peter Zijlstra wrote:
> On Fri, 2010-05-21 at 11:40 +0200, Frederic Weisbecker wrote:
> > On Fri, May 21, 2010 at 11:02:03AM +0200, Peter Zijlstra wrote:
>
> > > Also, avoid conditionals on the fast path by ordering with probe unregister
> > > so that we should never get on the callback path without the data being there.
> > >
> > \
> > > + head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
>
> > Should be rcu_dereference_sched ?
>
> No, I removed all that rcu stuff and synchronized against the probe
> unregister.
>
> I assumed that after probe unregister a tracepoint callback doesn't
> happen, which then guarantees we should never get !head.
I'm not sure about this. The tracepoints are called under rcu_read_lock(),
but there is not synchronize_rcu() after we unregister a tracepoint, which
means you can have a pending preempted one somewhere.
There is a call_rcu that removes the callbacks, but that only protect
the callback themselves.
>
> > > + for_each_possible_cpu(cpu)
> > > + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
> > > +
> > > + tp_event->perf_events = list;
> >
> >
> >
> > I suspect this must be rcu_assign_pointer.
>
> Same thing as above, I do this before probe register, so I see no need
> for RCU.
>
> > > + list = per_cpu_ptr(list, smp_processor_id());
> > > + hlist_add_head_rcu(&p_event->hlist_entry, list);
> >
> >
> >
> > Ah and may be small comment, because using the hlist api here
> > may puzzle more people than just me ;)
>
> What exactly is the puzzlement about?
The fact we use the hlist API not for hlist purpose but for a list.
> > > + if (--tp_event->perf_refcount > 0)
> > > + return;
> > > +
> > > + tp_event->perf_event_disable(tp_event);
> >
> >
> >
> > Don't we need a rcu_synchronize_sched() here?
>
> Doesn't probe unregister synchronize things against its own callback?
May be I missed it but it doesn't seem so.
> > > + raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
> >
> >
> >
> > Needs rcu_dereference_sched too. And this could be __this_cpu_var()
>
> Ahh! so that is what its called.
:)
> > > + preempt_disable_notrace();
> >
> >
> > Why is this needed. We have the recursion context protection already.
>
> Because:
>
> @@ -4094,7 +4087,7 @@ end:
>
> int perf_swevent_get_recursion_context(void)
> {
> - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
> + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
> int rctx;
>
> if (in_nmi())
>
Right.
Thanks.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:13 ` Frederic Weisbecker
@ 2010-05-21 10:15 ` Peter Zijlstra
2010-05-21 10:19 ` Frederic Weisbecker
2010-05-21 10:38 ` Ingo Molnar
2010-05-21 10:19 ` Peter Zijlstra
1 sibling, 2 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:15 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > What exactly is the puzzlement about?
> The fact we use the hlist API not for hlist purpose but for a list.
I might miss the confusion, but hlist _are_ lists. Its just that their
structure is slightly different that the regular struct list_head stuff.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:13 ` Frederic Weisbecker
2010-05-21 10:15 ` Peter Zijlstra
@ 2010-05-21 10:19 ` Peter Zijlstra
2010-05-21 10:21 ` Frederic Weisbecker
1 sibling, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:19 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > I assumed that after probe unregister a tracepoint callback doesn't
> > happen, which then guarantees we should never get !head.
> I'm not sure about this. The tracepoints are called under rcu_read_lock(),
> but there is not synchronize_rcu() after we unregister a tracepoint, which
> means you can have a pending preempted one somewhere.
>
> There is a call_rcu that removes the callbacks, but that only protect
> the callback themselves.
Ah, ok, so we should do probe_unregister + synchronize_sched().
That should ensure __DO_TRACE() doesn't call into it anymore.
/me goes make a patch
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:15 ` Peter Zijlstra
@ 2010-05-21 10:19 ` Frederic Weisbecker
2010-05-21 10:38 ` Ingo Molnar
1 sibling, 0 replies; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 10:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 12:15:43PM +0200, Peter Zijlstra wrote:
> On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > What exactly is the puzzlement about?
>
> > The fact we use the hlist API not for hlist purpose but for a list.
>
> I might miss the confusion, but hlist _are_ lists. Its just that their
> structure is slightly different that the regular struct list_head stuff.
>
It's just that people might except this is used for a hlist purpose.
But nevermind, that's just a small issue.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:19 ` Peter Zijlstra
@ 2010-05-21 10:21 ` Frederic Weisbecker
2010-05-21 10:34 ` Peter Zijlstra
0 siblings, 1 reply; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 10:21 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 12:19:09PM +0200, Peter Zijlstra wrote:
> On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > I assumed that after probe unregister a tracepoint callback doesn't
> > > happen, which then guarantees we should never get !head.
>
> > I'm not sure about this. The tracepoints are called under rcu_read_lock(),
> > but there is not synchronize_rcu() after we unregister a tracepoint, which
> > means you can have a pending preempted one somewhere.
> >
> > There is a call_rcu that removes the callbacks, but that only protect
> > the callback themselves.
>
> Ah, ok, so we should do probe_unregister + synchronize_sched().
> That should ensure __DO_TRACE() doesn't call into it anymore.
>
> /me goes make a patch
>
Yep. But that also means we need to rcu_dereference_sched() to access
the per cpu list of events.
Thanks.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:21 ` Frederic Weisbecker
@ 2010-05-21 10:34 ` Peter Zijlstra
2010-05-21 10:38 ` Frederic Weisbecker
0 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:34 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, 2010-05-21 at 12:21 +0200, Frederic Weisbecker wrote:
> On Fri, May 21, 2010 at 12:19:09PM +0200, Peter Zijlstra wrote:
> > On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > > I assumed that after probe unregister a tracepoint callback doesn't
> > > > happen, which then guarantees we should never get !head.
> >
> > > I'm not sure about this. The tracepoints are called under rcu_read_lock(),
> > > but there is not synchronize_rcu() after we unregister a tracepoint, which
> > > means you can have a pending preempted one somewhere.
> > >
> > > There is a call_rcu that removes the callbacks, but that only protect
> > > the callback themselves.
> >
> > Ah, ok, so we should do probe_unregister + synchronize_sched().
> > That should ensure __DO_TRACE() doesn't call into it anymore.
> >
> > /me goes make a patch
> >
>
>
> Yep. But that also means we need to rcu_dereference_sched() to access
> the per cpu list of events.
Why?
The per-cpu vars are allocated and freed in a fully serialized manner,
there should be no races what so ever.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:34 ` Peter Zijlstra
@ 2010-05-21 10:38 ` Frederic Weisbecker
0 siblings, 0 replies; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 10:38 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 12:34:03PM +0200, Peter Zijlstra wrote:
> On Fri, 2010-05-21 at 12:21 +0200, Frederic Weisbecker wrote:
> > On Fri, May 21, 2010 at 12:19:09PM +0200, Peter Zijlstra wrote:
> > > On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > > > I assumed that after probe unregister a tracepoint callback doesn't
> > > > > happen, which then guarantees we should never get !head.
> > >
> > > > I'm not sure about this. The tracepoints are called under rcu_read_lock(),
> > > > but there is not synchronize_rcu() after we unregister a tracepoint, which
> > > > means you can have a pending preempted one somewhere.
> > > >
> > > > There is a call_rcu that removes the callbacks, but that only protect
> > > > the callback themselves.
> > >
> > > Ah, ok, so we should do probe_unregister + synchronize_sched().
> > > That should ensure __DO_TRACE() doesn't call into it anymore.
> > >
> > > /me goes make a patch
> > >
> >
> >
> > Yep. But that also means we need to rcu_dereference_sched() to access
> > the per cpu list of events.
>
> Why?
>
> The per-cpu vars are allocated and freed in a fully serialized manner,
> there should be no races what so ever.
Ah right, I was confused.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:15 ` Peter Zijlstra
2010-05-21 10:19 ` Frederic Weisbecker
@ 2010-05-21 10:38 ` Ingo Molnar
2010-05-21 10:51 ` Ingo Molnar
1 sibling, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2010-05-21 10:38 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Frederic Weisbecker, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
* Peter Zijlstra <peterz@infradead.org> wrote:
> On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > What exactly is the puzzlement about?
>
> > The fact we use the hlist API not for hlist purpose
> > but for a list.
>
> I might miss the confusion, but hlist _are_ lists. Its
> just that their structure is slightly different that the
> regular struct list_head stuff.
Using an API in such a mixed way may cause puzzlement ;-)
Fortunately we've got the ultimate anti-puzzlement weapon:
code comments.
Ingo
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 02b/10] perf, trace: Fix probe unregister race
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
2010-05-21 9:40 ` Frederic Weisbecker
@ 2010-05-21 10:41 ` Peter Zijlstra
2010-05-21 10:43 ` Frederic Weisbecker
2010-05-31 7:19 ` [tip:perf/urgent] perf_events, " tip-bot for Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events tip-bot for Peter Zijlstra
2010-05-21 14:04 ` [PATCH 02/10] perf, trace: Use " Steven Rostedt
3 siblings, 2 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 10:41 UTC (permalink / raw)
To: Ingo Molnar
Cc: Paul Mackerras, Arnaldo Carvalho de Melo, Frederic Weisbecker,
Steven Rostedt, LKML
tracepoint_probe_unregister() does not synchronize against the probe callbacks,
so do that explicitly. This properly serializes the callbacks and the free of
the data used therein.
Also, use this_cpu_ptr() where possible.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/trace/ftrace.h | 2 +-
kernel/trace/trace_event_perf.c | 10 ++++++++--
kernel/trace/trace_kprobe.c | 4 ++--
kernel/trace/trace_syscalls.c | 4 ++--
4 files changed, 13 insertions(+), 7 deletions(-)
Index: linux-2.6/include/trace/ftrace.h
===================================================================
--- linux-2.6.orig/include/trace/ftrace.h
+++ linux-2.6/include/trace/ftrace.h
@@ -791,7 +791,7 @@ perf_trace_templ_##call(struct ftrace_ev
\
{ assign; } \
\
- head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
+ head = this_cpu_ptr(event_call->perf_events); \
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, __regs, head); \
}
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -109,7 +109,7 @@ int perf_trace_enable(struct perf_event
if (WARN_ON_ONCE(!list))
return -EINVAL;
- list = per_cpu_ptr(list, smp_processor_id());
+ list = this_cpu_ptr(list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
@@ -130,6 +130,12 @@ void perf_trace_destroy(struct perf_even
tp_event->perf_event_disable(tp_event);
+ /*
+ * Ensure our callback won't be called anymore. See
+ * tracepoint_probe_unregister() and __DO_TRACE().
+ */
+ synchronize_sched();
+
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
@@ -156,7 +162,7 @@ __kprobes void *perf_trace_buf_prepare(i
if (*rctxp < 0)
return NULL;
- raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
+ raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
Index: linux-2.6/kernel/trace/trace_kprobe.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_kprobe.c
+++ linux-2.6/kernel/trace/trace_kprobe.c
@@ -1362,7 +1362,7 @@ static __kprobes void kprobe_perf_func(s
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
}
@@ -1395,7 +1395,7 @@ static __kprobes void kretprobe_perf_fun
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
Index: linux-2.6/kernel/trace/trace_syscalls.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_syscalls.c
+++ linux-2.6/kernel/trace/trace_syscalls.c
@@ -469,7 +469,7 @@ static void perf_syscall_enter(struct pt
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->enter_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
@@ -545,7 +545,7 @@ static void perf_syscall_exit(struct pt_
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->exit_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02b/10] perf, trace: Fix probe unregister race
2010-05-21 10:41 ` [PATCH 02b/10] perf, trace: Fix probe unregister race Peter Zijlstra
@ 2010-05-21 10:43 ` Frederic Weisbecker
2010-05-31 7:19 ` [tip:perf/urgent] perf_events, " tip-bot for Peter Zijlstra
1 sibling, 0 replies; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-21 10:43 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 12:41:16PM +0200, Peter Zijlstra wrote:
> tracepoint_probe_unregister() does not synchronize against the probe callbacks,
> so do that explicitly. This properly serializes the callbacks and the free of
> the data used therein.
>
> Also, use this_cpu_ptr() where possible.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
(and you previous patches too)
> ---
> include/trace/ftrace.h | 2 +-
> kernel/trace/trace_event_perf.c | 10 ++++++++--
> kernel/trace/trace_kprobe.c | 4 ++--
> kernel/trace/trace_syscalls.c | 4 ++--
> 4 files changed, 13 insertions(+), 7 deletions(-)
>
> Index: linux-2.6/include/trace/ftrace.h
> ===================================================================
> --- linux-2.6.orig/include/trace/ftrace.h
> +++ linux-2.6/include/trace/ftrace.h
> @@ -791,7 +791,7 @@ perf_trace_templ_##call(struct ftrace_ev
> \
> { assign; } \
> \
> - head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
> + head = this_cpu_ptr(event_call->perf_events); \
> perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
> __count, __regs, head); \
> }
> Index: linux-2.6/kernel/trace/trace_event_perf.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_event_perf.c
> +++ linux-2.6/kernel/trace/trace_event_perf.c
> @@ -109,7 +109,7 @@ int perf_trace_enable(struct perf_event
> if (WARN_ON_ONCE(!list))
> return -EINVAL;
>
> - list = per_cpu_ptr(list, smp_processor_id());
> + list = this_cpu_ptr(list);
> hlist_add_head_rcu(&p_event->hlist_entry, list);
>
> return 0;
> @@ -130,6 +130,12 @@ void perf_trace_destroy(struct perf_even
>
> tp_event->perf_event_disable(tp_event);
>
> + /*
> + * Ensure our callback won't be called anymore. See
> + * tracepoint_probe_unregister() and __DO_TRACE().
> + */
> + synchronize_sched();
> +
> free_percpu(tp_event->perf_events);
> tp_event->perf_events = NULL;
>
> @@ -156,7 +162,7 @@ __kprobes void *perf_trace_buf_prepare(i
> if (*rctxp < 0)
> return NULL;
>
> - raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
> + raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
>
> /* zero the dead bytes from align to not leak stack to user */
> memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
> Index: linux-2.6/kernel/trace/trace_kprobe.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_kprobe.c
> +++ linux-2.6/kernel/trace/trace_kprobe.c
> @@ -1362,7 +1362,7 @@ static __kprobes void kprobe_perf_func(s
> for (i = 0; i < tp->nr_args; i++)
> call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
>
> - head = per_cpu_ptr(call->perf_events, smp_processor_id());
> + head = this_cpu_ptr(call->perf_events);
> perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
> }
>
> @@ -1395,7 +1395,7 @@ static __kprobes void kretprobe_perf_fun
> for (i = 0; i < tp->nr_args; i++)
> call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
>
> - head = per_cpu_ptr(call->perf_events, smp_processor_id());
> + head = this_cpu_ptr(call->perf_events);
> perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
> }
>
> Index: linux-2.6/kernel/trace/trace_syscalls.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/trace_syscalls.c
> +++ linux-2.6/kernel/trace/trace_syscalls.c
> @@ -469,7 +469,7 @@ static void perf_syscall_enter(struct pt
> syscall_get_arguments(current, regs, 0, sys_data->nb_args,
> (unsigned long *)&rec->args);
>
> - head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
> + head = this_cpu_ptr(sys_data->enter_event->perf_events);
> perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
> }
>
> @@ -545,7 +545,7 @@ static void perf_syscall_exit(struct pt_
> rec->nr = syscall_nr;
> rec->ret = syscall_get_return_value(current, regs);
>
> - head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
> + head = this_cpu_ptr(sys_data->exit_event->perf_events);
> perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
> }
>
>
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 10:38 ` Ingo Molnar
@ 2010-05-21 10:51 ` Ingo Molnar
0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2010-05-21 10:51 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Frederic Weisbecker, Paul Mackerras, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
* Ingo Molnar <mingo@elte.hu> wrote:
>
> * Peter Zijlstra <peterz@infradead.org> wrote:
>
> > On Fri, 2010-05-21 at 12:13 +0200, Frederic Weisbecker wrote:
> > > > What exactly is the puzzlement about?
> >
> > > The fact we use the hlist API not for hlist purpose
> > > but for a list.
> >
> > I might miss the confusion, but hlist _are_ lists. Its
> > just that their structure is slightly different that
> > the regular struct list_head stuff.
>
> Using an API in such a mixed way may cause puzzlement
> ;-)
I take that back - it's clear from the function prototype
that we are using a mixed API: (hlist,list).
Sorry,
Ingo
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 09/10] perf: Remove more fastpath code
2010-05-21 9:02 ` [PATCH 09/10] perf: Remove more fastpath code Peter Zijlstra
@ 2010-05-21 11:15 ` Steven Rostedt
2010-05-21 11:18 ` Peter Zijlstra
2010-05-21 11:30 ` [tip:perf/core] perf: Remove more code from the fastpath tip-bot for Peter Zijlstra
1 sibling, 1 reply; 53+ messages in thread
From: Steven Rostedt @ 2010-05-21 11:15 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 11:02 +0200, Peter Zijlstra wrote:
> plain text document attachment (perf-opt-output-more.patch)
> Sanity checks cost instructions
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> -
> - /*
> - * Check we didn't copy past our reservation window, taking the
> - * possible unsigned int wrap into account.
> - */
> - if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
> - return;
Are you sure you want to remove this? I mean, sanity checks are a small
cost (I have several in the ftrace ring buffer) and can catch bugs in
case something happens during development. Especially when the code is
under a lot of flux.
I even have a sanity check that Ingo asked me to add, which would detect
if a tracer (not the ring buffer, but the user of the ring buffer)
recursed on itself. I think that check detected one bug in the function
tracer in the begging, but hasn't caught anything since.
-- Steve
> -
> do {
> - unsigned long size = min(handle->size, len);
> + unsigned long size = min_t(unsigned long, handle->size, len);
>
> memcpy(handle->addr, buf, size);
>
> @@ -3054,15 +3045,12 @@ int perf_output_begin(struct perf_output
> goto fail;
> } while (local_cmpxchg(&data->head, offset, head) != offset);
>
> - handle->offset = offset;
> - handle->head = head;
> -
> if (head - local_read(&data->wakeup) > data->watermark)
> local_add(data->watermark, &data->wakeup);
>
> - handle->page = handle->offset >> (PAGE_SHIFT + page_order(data));
> + handle->page = offset >> (PAGE_SHIFT + page_order(data));
> handle->page &= data->nr_pages - 1;
> - handle->size = handle->offset & ((PAGE_SIZE << page_order(data)) - 1);
> + handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
> handle->addr = data->data_pages[handle->page];
> handle->addr += handle->size;
> handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
>
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 09/10] perf: Remove more fastpath code
2010-05-21 11:15 ` Steven Rostedt
@ 2010-05-21 11:18 ` Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 11:18 UTC (permalink / raw)
To: rostedt
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 07:15 -0400, Steven Rostedt wrote:
> > - if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
> > - return;
>
> Are you sure you want to remove this? I mean, sanity checks are a small
> cost (I have several in the ftrace ring buffer) and can catch bugs in
> case something happens during development. Especially when the code is
> under a lot of flux.
Yeah, I pondered adding a CONFIG_PERF_DEBUG for that.. thing is I was
tracking 2 variables just for this, and that seemed a bit excessive.
> I even have a sanity check that Ingo asked me to add, which would detect
> if a tracer (not the ring buffer, but the user of the ring buffer)
> recursed on itself. I think that check detected one bug in the function
> tracer in the begging, but hasn't caught anything since.
I still have recursion checks.
^ permalink raw reply [flat|nested] 53+ messages in thread
* [tip:perf/core] perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
2010-05-21 9:40 ` Frederic Weisbecker
2010-05-21 10:41 ` [PATCH 02b/10] perf, trace: Fix probe unregister race Peter Zijlstra
@ 2010-05-21 11:28 ` tip-bot for Peter Zijlstra
2010-05-21 14:04 ` [PATCH 02/10] perf, trace: Use " Steven Rostedt
3 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:28 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 1c024eca51fdc965290acf342ae16a476c2189d0
Gitweb: http://git.kernel.org/tip/1c024eca51fdc965290acf342ae16a476c2189d0
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 19 May 2010 14:02:22 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:56 +0200
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
Avoid the swevent hash-table by using per-tracepoint
hlists.
Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/ftrace_event.h | 16 +++---
include/linux/perf_event.h | 6 +-
include/trace/ftrace.h | 4 +-
kernel/perf_event.c | 94 +++++++++++++++--------------
kernel/trace/trace_event_perf.c | 127 ++++++++++++++++++++------------------
kernel/trace/trace_kprobe.c | 9 ++-
kernel/trace/trace_syscalls.c | 11 ++-
7 files changed, 143 insertions(+), 124 deletions(-)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 126071b..7024b7d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -133,7 +133,7 @@ struct ftrace_event_call {
void *data;
int perf_refcount;
- void *perf_data;
+ struct hlist_head *perf_events;
int (*perf_event_enable)(struct ftrace_event_call *);
void (*perf_event_disable)(struct ftrace_event_call *);
};
@@ -192,9 +192,11 @@ struct perf_event;
DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
-extern int perf_trace_enable(int event_id, void *data);
-extern void perf_trace_disable(int event_id);
-extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+extern int perf_trace_init(struct perf_event *event);
+extern void perf_trace_destroy(struct perf_event *event);
+extern int perf_trace_enable(struct perf_event *event);
+extern void perf_trace_disable(struct perf_event *event);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
extern void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,
static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
- u64 count, struct pt_regs *regs, void *event)
+ u64 count, struct pt_regs *regs, void *head)
{
- struct trace_entry *entry = raw_data;
-
- perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
+ perf_tp_event(addr, count, raw_data, size, regs, head);
perf_swevent_put_recursion_context(rctx);
}
#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe50347..7cd7b35 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -727,6 +727,7 @@ struct perf_event {
perf_overflow_handler_t overflow_handler;
#ifdef CONFIG_EVENT_TRACING
+ struct ftrace_event_call *tp_event;
struct event_filter *filter;
#endif
@@ -992,8 +993,9 @@ static inline bool perf_paranoid_kernel(void)
}
extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size, struct pt_regs *regs, void *event);
+extern void perf_tp_event(u64 addr, u64 count, void *record,
+ int entry_size, struct pt_regs *regs,
+ struct hlist_head *head);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index f282885..4eb2148 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \
+ struct hlist_head *head; \
int __entry_size; \
int __data_size; \
int rctx; \
@@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
\
{ assign; } \
\
+ head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
- __count, __regs, event_call->perf_data); \
+ __count, __regs, head); \
}
#undef DEFINE_EVENT
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 45b7aec..3f2cc31 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4005,9 +4005,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
perf_swevent_overflow(event, 0, nmi, data, regs);
}
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data);
-
static int perf_exclude_event(struct perf_event *event,
struct pt_regs *regs)
{
@@ -4037,10 +4034,6 @@ static int perf_swevent_match(struct perf_event *event,
if (perf_exclude_event(event, regs))
return 0;
- if (event->attr.type == PERF_TYPE_TRACEPOINT &&
- !perf_tp_event_match(event, data))
- return 0;
-
return 1;
}
@@ -4122,7 +4115,7 @@ end:
int perf_swevent_get_recursion_context(void)
{
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
int rctx;
if (in_nmi())
@@ -4134,10 +4127,8 @@ int perf_swevent_get_recursion_context(void)
else
rctx = 0;
- if (cpuctx->recursion[rctx]) {
- put_cpu_var(perf_cpu_context);
+ if (cpuctx->recursion[rctx])
return -1;
- }
cpuctx->recursion[rctx]++;
barrier();
@@ -4151,7 +4142,6 @@ void perf_swevent_put_recursion_context(int rctx)
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
cpuctx->recursion[rctx]--;
- put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
@@ -4162,6 +4152,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct perf_sample_data data;
int rctx;
+ preempt_disable_notrace();
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
return;
@@ -4171,6 +4162,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
perf_swevent_put_recursion_context(rctx);
+ preempt_enable_notrace();
}
static void perf_swevent_read(struct perf_event *event)
@@ -4486,11 +4478,43 @@ static int swevent_hlist_get(struct perf_event *event)
#ifdef CONFIG_EVENT_TRACING
-void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size, struct pt_regs *regs, void *event)
+static const struct pmu perf_ops_tracepoint = {
+ .enable = perf_trace_enable,
+ .disable = perf_trace_disable,
+ .read = perf_swevent_read,
+ .unthrottle = perf_swevent_unthrottle,
+};
+
+static int perf_tp_filter_match(struct perf_event *event,
+ struct perf_sample_data *data)
+{
+ void *record = data->raw->data;
+
+ if (likely(!event->filter) || filter_match_preds(event->filter, record))
+ return 1;
+ return 0;
+}
+
+static int perf_tp_event_match(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ if (perf_exclude_event(event, regs))
+ return 0;
+
+ if (!perf_tp_filter_match(event, data))
+ return 0;
+
+ return 1;
+}
+
+void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
+ struct pt_regs *regs, struct hlist_head *head)
{
- const int type = PERF_TYPE_TRACEPOINT;
struct perf_sample_data data;
+ struct perf_event *event;
+ struct hlist_node *node;
+
struct perf_raw_record raw = {
.size = entry_size,
.data = record,
@@ -4499,30 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
perf_sample_data_init(&data, addr);
data.raw = &raw;
- if (!event) {
- do_perf_sw_event(type, event_id, count, 1, &data, regs);
- return;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_add(event, count, 1, &data, regs);
}
-
- if (perf_swevent_match(event, type, event_id, &data, regs))
- perf_swevent_add(event, count, 1, &data, regs);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(perf_tp_event);
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data)
-{
- void *record = data->raw->data;
-
- if (likely(!event->filter) || filter_match_preds(event->filter, record))
- return 1;
- return 0;
-}
-
static void tp_perf_event_destroy(struct perf_event *event)
{
- perf_trace_disable(event->attr.config);
- swevent_hlist_put(event);
+ perf_trace_destroy(event);
}
static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4538,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (perf_trace_enable(event->attr.config, event))
+ err = perf_trace_init(event);
+ if (err)
return NULL;
event->destroy = tp_perf_event_destroy;
- err = swevent_hlist_get(event);
- if (err) {
- perf_trace_disable(event->attr.config);
- return ERR_PTR(err);
- }
- return &perf_ops_generic;
+ return &perf_ops_tracepoint;
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4576,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event)
#else
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data)
-{
- return 1;
-}
-
static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
return NULL;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a1304f8..39d5ea7 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
-static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
+static int perf_trace_event_init(struct ftrace_event_call *tp_event,
+ struct perf_event *p_event)
{
+ struct hlist_head *list;
int ret = -ENOMEM;
+ int cpu;
- if (event->perf_refcount++ > 0) {
- event->perf_data = NULL;
+ p_event->tp_event = tp_event;
+ if (tp_event->perf_refcount++ > 0)
return 0;
- }
+
+ list = alloc_percpu(struct hlist_head);
+ if (!list)
+ goto fail;
+
+ for_each_possible_cpu(cpu)
+ INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
+
+ tp_event->perf_events = list;
if (!total_ref_count) {
char *buf;
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
for (i = 0; i < 4; i++) {
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
- goto fail_buf;
+ goto fail;
- rcu_assign_pointer(perf_trace_buf[i], buf);
+ perf_trace_buf[i] = buf;
}
}
- ret = event->perf_event_enable(event);
- if (!ret) {
- event->perf_data = data;
- total_ref_count++;
- return 0;
- }
+ ret = tp_event->perf_event_enable(tp_event);
+ if (ret)
+ goto fail;
-fail_buf:
+ total_ref_count++;
+ return 0;
+
+fail:
if (!total_ref_count) {
int i;
@@ -61,21 +72,26 @@ fail_buf:
perf_trace_buf[i] = NULL;
}
}
- event->perf_refcount--;
+
+ if (!--tp_event->perf_refcount) {
+ free_percpu(tp_event->perf_events);
+ tp_event->perf_events = NULL;
+ }
return ret;
}
-int perf_trace_enable(int event_id, void *data)
+int perf_trace_init(struct perf_event *p_event)
{
- struct ftrace_event_call *event;
+ struct ftrace_event_call *tp_event;
+ int event_id = p_event->attr.config;
int ret = -EINVAL;
mutex_lock(&event_mutex);
- list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id && event->perf_event_enable &&
- try_module_get(event->mod)) {
- ret = perf_trace_event_enable(event, data);
+ list_for_each_entry(tp_event, &ftrace_events, list) {
+ if (tp_event->id == event_id && tp_event->perf_event_enable &&
+ try_module_get(tp_event->mod)) {
+ ret = perf_trace_event_init(tp_event, p_event);
break;
}
}
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data)
return ret;
}
-static void perf_trace_event_disable(struct ftrace_event_call *event)
+int perf_trace_enable(struct perf_event *p_event)
{
- if (--event->perf_refcount > 0)
- return;
+ struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct hlist_head *list;
- event->perf_event_disable(event);
+ list = tp_event->perf_events;
+ if (WARN_ON_ONCE(!list))
+ return -EINVAL;
- if (!--total_ref_count) {
- char *buf[4];
- int i;
-
- for (i = 0; i < 4; i++) {
- buf[i] = perf_trace_buf[i];
- rcu_assign_pointer(perf_trace_buf[i], NULL);
- }
+ list = per_cpu_ptr(list, smp_processor_id());
+ hlist_add_head_rcu(&p_event->hlist_entry, list);
- /*
- * Ensure every events in profiling have finished before
- * releasing the buffers
- */
- synchronize_sched();
+ return 0;
+}
- for (i = 0; i < 4; i++)
- free_percpu(buf[i]);
- }
+void perf_trace_disable(struct perf_event *p_event)
+{
+ hlist_del_rcu(&p_event->hlist_entry);
}
-void perf_trace_disable(int event_id)
+void perf_trace_destroy(struct perf_event *p_event)
{
- struct ftrace_event_call *event;
+ struct ftrace_event_call *tp_event = p_event->tp_event;
+ int i;
- mutex_lock(&event_mutex);
- list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id) {
- perf_trace_event_disable(event);
- module_put(event->mod);
- break;
+ if (--tp_event->perf_refcount > 0)
+ return;
+
+ tp_event->perf_event_disable(tp_event);
+
+ free_percpu(tp_event->perf_events);
+ tp_event->perf_events = NULL;
+
+ if (!--total_ref_count) {
+ for (i = 0; i < 4; i++) {
+ free_percpu(perf_trace_buf[i]);
+ perf_trace_buf[i] = NULL;
}
}
- mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
- char *trace_buf, *raw_data;
+ char *raw_data;
int pc;
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
- goto err_recursion;
-
- trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
- if (!trace_buf)
- goto err;
+ return NULL;
- raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
+ raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
entry->type = type;
return raw_data;
-err:
- perf_swevent_put_recursion_context(*rctxp);
-err_recursion:
- return NULL;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 20c96de..4681f60 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry_head *entry;
+ struct hlist_head *head;
u8 *data;
int size, __size, i;
int rctx;
@@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data);
+ head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
}
/* Kretprobe profile handler */
@@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry_head *entry;
+ struct hlist_head *head;
u8 *data;
int size, __size, i;
int rctx;
@@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
- regs, call->perf_data);
+ head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
static int probe_perf_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a657cef..eb769f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
+ struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
@@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt_regs *regs, long id)
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
- sys_data->enter_event->perf_data);
+
+ head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
+ struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
@@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
- sys_data->exit_event->perf_data);
+ head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
int perf_sysexit_enable(struct ftrace_event_call *call)
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Ensure that IOC_OUTPUT isn't used to create multi-writer buffers
2010-05-21 9:02 ` [PATCH 03/10] perf: Ensure IOC_OUTPUT isnt used to create multi-writer buffers Peter Zijlstra
@ 2010-05-21 11:28 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:28 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 0f139300c9057c16b5833a4636b715b104fe0baa
Gitweb: http://git.kernel.org/tip/0f139300c9057c16b5833a4636b715b104fe0baa
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 14:35:15 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:57 +0200
perf: Ensure that IOC_OUTPUT isn't used to create multi-writer buffers
Since we want to ensure buffers only have a single
writer, we must avoid creating one with multiple.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.528215873@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_event.c | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3f2cc31..7a93252 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4920,6 +4920,13 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
int fput_needed = 0;
int ret = -EINVAL;
+ /*
+ * Don't allow output of inherited per-task events. This would
+ * create performance issues due to cross cpu access.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+
if (!output_fd)
goto set;
@@ -4940,6 +4947,18 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
if (event->data)
goto out;
+ /*
+ * Don't allow cross-cpu buffers
+ */
+ if (output_event->cpu != event->cpu)
+ goto out;
+
+ /*
+ * If its not a per-cpu buffer, it must be the same task.
+ */
+ if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ goto out;
+
atomic_long_inc(&output_file->f_count);
set:
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf-record: Remove -M
2010-05-21 9:02 ` [PATCH 04/10] perf-record: Remove -M Peter Zijlstra
@ 2010-05-21 11:28 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:28 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 57adc51dce9102b6641269dd04f5b99aac83b820
Gitweb: http://git.kernel.org/tip/57adc51dce9102b6641269dd04f5b99aac83b820
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 14:39:55 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:57 +0200
perf-record: Remove -M
Since it is not allowed to create cross-cpu (or
cross-task) buffers, this option is no longer valid.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.582740993@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-record.c | 36 ++++++++++++------------------------
1 files changed, 12 insertions(+), 24 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e672269..94e210f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -61,8 +61,6 @@ static bool call_graph = false;
static bool inherit_stat = false;
static bool no_samples = false;
static bool sample_address = false;
-static bool multiplex = false;
-static int multiplex_fd = -1;
static long samples = 0;
static u64 bytes_written = 0;
@@ -366,27 +364,19 @@ try_again:
*/
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter][thread_index];
- if (multiplex && multiplex_fd == -1)
- multiplex_fd = fd[nr_cpu][counter][thread_index];
- if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) {
-
- ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
- assert(ret != -1);
- } else {
- event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mmap_array[nr_cpu][counter][thread_index].counter = counter;
- mmap_array[nr_cpu][counter][thread_index].prev = 0;
- mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
- if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
+ event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+ event_array[nr_poll].events = POLLIN;
+ nr_poll++;
+
+ mmap_array[nr_cpu][counter][thread_index].counter = counter;
+ mmap_array[nr_cpu][counter][thread_index].prev = 0;
+ mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
+ mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+ if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
+ error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ exit(-1);
}
if (filter != NULL) {
@@ -820,8 +810,6 @@ static const struct option options[] = {
"Sample addresses"),
OPT_BOOLEAN('n', "no-samples", &no_samples,
"don't sample"),
- OPT_BOOLEAN('M', "multiplex", &multiplex,
- "multiplex counter output in a single channel"),
OPT_END()
};
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf-record: Share per-cpu buffers
2010-05-21 9:02 ` [PATCH 05/10] perf-record: Share per-cpu buffers Peter Zijlstra
2010-05-21 9:44 ` Frederic Weisbecker
@ 2010-05-21 11:29 ` tip-bot for Peter Zijlstra
1 sibling, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:29 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 0e2e63dd608bf5844ffae7bf7d860de18a62724c
Gitweb: http://git.kernel.org/tip/0e2e63dd608bf5844ffae7bf7d860de18a62724c
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 14:45:26 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:58 +0200
perf-record: Share per-cpu buffers
It seems a waste of space to create a buffer per
event, share it per-cpu.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.634824884@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-record.c | 52 +++++++++++++++++++++++-------------------
1 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 94e210f..9bc8905 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -82,7 +82,7 @@ struct mmap_data {
unsigned int prev;
};
-static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS];
static unsigned long mmap_read_head(struct mmap_data *md)
{
@@ -365,18 +365,29 @@ try_again:
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mmap_array[nr_cpu][counter][thread_index].counter = counter;
- mmap_array[nr_cpu][counter][thread_index].prev = 0;
- mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
- if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
+ if (counter || thread_index) {
+ ret = ioctl(fd[nr_cpu][counter][thread_index],
+ PERF_EVENT_IOC_SET_OUTPUT,
+ fd[nr_cpu][0][0]);
+ if (ret) {
+ error("failed to set output: %d (%s)\n", errno,
+ strerror(errno));
+ exit(-1);
+ }
+ } else {
+ mmap_array[nr_cpu].counter = counter;
+ mmap_array[nr_cpu].prev = 0;
+ mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
+ mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+ if (mmap_array[nr_cpu].base == MAP_FAILED) {
+ error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ exit(-1);
+ }
+
+ event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+ event_array[nr_poll].events = POLLIN;
+ nr_poll++;
}
if (filter != NULL) {
@@ -477,16 +488,11 @@ static struct perf_event_header finished_round_event = {
static void mmap_read_all(void)
{
- int i, counter, thread;
+ int i;
for (i = 0; i < nr_cpu; i++) {
- for (counter = 0; counter < nr_counters; counter++) {
- for (thread = 0; thread < thread_num; thread++) {
- if (mmap_array[i][counter][thread].base)
- mmap_read(&mmap_array[i][counter][thread]);
- }
-
- }
+ if (mmap_array[i].base)
+ mmap_read(&mmap_array[i]);
}
if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -861,9 +867,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
for (i = 0; i < MAX_NR_CPUS; i++) {
for (j = 0; j < MAX_COUNTERS; j++) {
fd[i][j] = malloc(sizeof(int)*thread_num);
- mmap_array[i][j] = zalloc(
- sizeof(struct mmap_data)*thread_num);
- if (!fd[i][j] || !mmap_array[i][j])
+ if (!fd[i][j])
return -ENOMEM;
}
}
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Fix wakeup storm for RO mmap()s
2010-05-21 9:02 ` [PATCH 06/10] perf: Fix wakeup storm for RO mmap()s Peter Zijlstra
@ 2010-05-21 11:29 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:29 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: adb8e118f288dc4c569ac9a89010b81a4745fbf0
Gitweb: http://git.kernel.org/tip/adb8e118f288dc4c569ac9a89010b81a4745fbf0
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 16:21:55 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:58 +0200
perf: Fix wakeup storm for RO mmap()s
RO mmap()s don't update the tail pointer, so
comparing against it for determining the written data
size doesn't really do any good.
Keep track of when we last did a wakeup, and compare
against that.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.684479310@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 2 +-
kernel/perf_event.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7cd7b35..7098ebb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -599,7 +599,7 @@ struct perf_mmap_data {
local_t head; /* write position */
local_t nest; /* nested writers */
local_t events; /* event limit */
- local_t wakeup; /* needs a wakeup */
+ local_t wakeup; /* wakeup stamp */
local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7a93252..1531e0b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3056,8 +3056,8 @@ int perf_output_begin(struct perf_output_handle *handle,
handle->offset = offset;
handle->head = head;
- if (head - tail > data->watermark)
- local_inc(&data->wakeup);
+ if (head - local_read(&data->wakeup) > data->watermark)
+ local_add(data->watermark, &data->wakeup);
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Optimize perf_output_copy()
2010-05-21 9:02 ` [PATCH 07/10] perf: Optimize perf_output_copy Peter Zijlstra
@ 2010-05-21 11:29 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:29 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 5d967a8be636a4f301a8daad642bd1007299d9ec
Gitweb: http://git.kernel.org/tip/5d967a8be636a4f301a8daad642bd1007299d9ec
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 16:46:39 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:59 +0200
perf: Optimize perf_output_copy()
Reduce the clutter in perf_output_copy() by keeping
an interator in perf_output_handle.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.742809176@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 3 ++
kernel/perf_event.c | 54 +++++++++++++++++++++----------------------
2 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7098ebb..7bd17f0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -806,6 +806,9 @@ struct perf_output_handle {
unsigned long head;
unsigned long offset;
unsigned long wakeup;
+ unsigned long size;
+ void *addr;
+ int page;
int nmi;
int sample;
};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 1531e0b..b67549a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2961,39 +2961,30 @@ again:
void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- unsigned int pages_mask;
- unsigned long offset;
- unsigned int size;
- void **pages;
-
- offset = handle->offset;
- pages_mask = handle->data->nr_pages - 1;
- pages = handle->data->data_pages;
-
- do {
- unsigned long page_offset;
- unsigned long page_size;
- int nr;
-
- nr = (offset >> PAGE_SHIFT) & pages_mask;
- page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
- page_offset = offset & (page_size - 1);
- size = min_t(unsigned int, page_size - page_offset, len);
-
- memcpy(pages[nr] + page_offset, buf, size);
-
- len -= size;
- buf += size;
- offset += size;
- } while (len);
-
- handle->offset = offset;
+ handle->offset += len;
/*
* Check we didn't copy past our reservation window, taking the
* possible unsigned int wrap into account.
*/
- WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+ if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
+ return;
+
+ do {
+ unsigned long size = min(handle->size, len);
+
+ memcpy(handle->addr, buf, size);
+
+ len -= size;
+ handle->addr += size;
+ handle->size -= size;
+ if (!handle->size) {
+ handle->page++;
+ handle->page &= handle->data->nr_pages - 1;
+ handle->addr = handle->data->data_pages[handle->page];
+ handle->size = PAGE_SIZE << handle->data->data_order;
+ }
+ } while (len);
}
int perf_output_begin(struct perf_output_handle *handle,
@@ -3059,6 +3050,13 @@ int perf_output_begin(struct perf_output_handle *handle,
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
+ handle->page = handle->offset >> (PAGE_SHIFT + data->data_order);
+ handle->page &= data->nr_pages - 1;
+ handle->size = handle->offset & ((PAGE_SIZE << data->data_order) - 1);
+ handle->addr = data->data_pages[handle->page];
+ handle->addr += handle->size;
+ handle->size = (PAGE_SIZE << data->data_order) - handle->size;
+
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Optimize the !vmalloc backed buffer
2010-05-21 9:02 ` [PATCH 08/10] perf: Optimize the !vmalloc backed buffer Peter Zijlstra
@ 2010-05-21 11:29 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:29 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 3cafa9fbb5c1d564b7b8e7224f493effbf04ffee
Gitweb: http://git.kernel.org/tip/3cafa9fbb5c1d564b7b8e7224f493effbf04ffee
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 19:07:56 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:59 +0200
perf: Optimize the !vmalloc backed buffer
Reduce code and data by using the knowledge that for
!PERF_USE_VMALLOC data_order is always 0.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.795019386@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 2 +-
kernel/perf_event.c | 41 ++++++++++++++++++++++++++---------------
2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7bd17f0..09cd9c1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -588,8 +588,8 @@ struct perf_mmap_data {
struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
+ int page_order; /* allocation order */
#endif
- int data_order; /* allocation order */
int nr_pages; /* nr of data pages */
int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b67549a..953ce46 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2297,11 +2297,6 @@ unlock:
rcu_read_unlock();
}
-static unsigned long perf_data_size(struct perf_mmap_data *data)
-{
- return data->nr_pages << (PAGE_SHIFT + data->data_order);
-}
-
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2359,7 +2354,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
goto fail_data_pages;
}
- data->data_order = 0;
data->nr_pages = nr_pages;
return data;
@@ -2395,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
kfree(data);
}
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return 0;
+}
+
#else
/*
@@ -2403,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
* Required for architectures that have d-cache aliasing issues.
*/
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return data->page_order;
+}
+
static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
- if (pgoff > (1UL << data->data_order))
+ if (pgoff > (1UL << page_order(data)))
return NULL;
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
@@ -2426,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
int i, nr;
data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << data->data_order;
+ nr = 1 << page_order(data);
base = data->user_page;
for (i = 0; i < nr + 1; i++)
@@ -2465,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
data->user_page = all_buf;
data->data_pages[0] = all_buf + PAGE_SIZE;
- data->data_order = ilog2(nr_pages);
+ data->page_order = ilog2(nr_pages);
data->nr_pages = 1;
return data;
@@ -2479,6 +2483,11 @@ fail:
#endif
+static unsigned long perf_data_size(struct perf_mmap_data *data)
+{
+ return data->nr_pages << (PAGE_SHIFT + page_order(data));
+}
+
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
@@ -2979,10 +2988,12 @@ void perf_output_copy(struct perf_output_handle *handle,
handle->addr += size;
handle->size -= size;
if (!handle->size) {
+ struct perf_mmap_data *data = handle->data;
+
handle->page++;
- handle->page &= handle->data->nr_pages - 1;
- handle->addr = handle->data->data_pages[handle->page];
- handle->size = PAGE_SIZE << handle->data->data_order;
+ handle->page &= data->nr_pages - 1;
+ handle->addr = data->data_pages[handle->page];
+ handle->size = PAGE_SIZE << page_order(data);
}
} while (len);
}
@@ -3050,12 +3061,12 @@ int perf_output_begin(struct perf_output_handle *handle,
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
- handle->page = handle->offset >> (PAGE_SHIFT + data->data_order);
+ handle->page = handle->offset >> (PAGE_SHIFT + page_order(data));
handle->page &= data->nr_pages - 1;
- handle->size = handle->offset & ((PAGE_SIZE << data->data_order) - 1);
+ handle->size = handle->offset & ((PAGE_SIZE << page_order(data)) - 1);
handle->addr = data->data_pages[handle->page];
handle->addr += handle->size;
- handle->size = (PAGE_SIZE << data->data_order) - handle->size;
+ handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Remove more code from the fastpath
2010-05-21 9:02 ` [PATCH 09/10] perf: Remove more fastpath code Peter Zijlstra
2010-05-21 11:15 ` Steven Rostedt
@ 2010-05-21 11:30 ` tip-bot for Peter Zijlstra
1 sibling, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:30 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: a94ffaaf55552769af328eaca9260fe6291c66c7
Gitweb: http://git.kernel.org/tip/a94ffaaf55552769af328eaca9260fe6291c66c7
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 19:50:07 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:37:59 +0200
perf: Remove more code from the fastpath
Sanity checks cost instructions.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.852926930@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 2 --
kernel/perf_event.c | 20 ++++----------------
2 files changed, 4 insertions(+), 18 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 09cd9c1..fb6c91e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -803,8 +803,6 @@ struct perf_cpu_context {
struct perf_output_handle {
struct perf_event *event;
struct perf_mmap_data *data;
- unsigned long head;
- unsigned long offset;
unsigned long wakeup;
unsigned long size;
void *addr;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 953ce46..d25c864 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2967,20 +2967,11 @@ again:
preempt_enable();
}
-void perf_output_copy(struct perf_output_handle *handle,
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- handle->offset += len;
-
- /*
- * Check we didn't copy past our reservation window, taking the
- * possible unsigned int wrap into account.
- */
- if (WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0))
- return;
-
do {
- unsigned long size = min(handle->size, len);
+ unsigned long size = min_t(unsigned long, handle->size, len);
memcpy(handle->addr, buf, size);
@@ -3055,15 +3046,12 @@ int perf_output_begin(struct perf_output_handle *handle,
goto fail;
} while (local_cmpxchg(&data->head, offset, head) != offset);
- handle->offset = offset;
- handle->head = head;
-
if (head - local_read(&data->wakeup) > data->watermark)
local_add(data->watermark, &data->wakeup);
- handle->page = handle->offset >> (PAGE_SHIFT + page_order(data));
+ handle->page = offset >> (PAGE_SHIFT + page_order(data));
handle->page &= data->nr_pages - 1;
- handle->size = handle->offset & ((PAGE_SIZE << page_order(data)) - 1);
+ handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
handle->addr = data->data_pages[handle->page];
handle->addr += handle->size;
handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/core] perf: Optimize perf_tp_event_match()
2010-05-21 9:02 ` [PATCH 10/10] perf: Optimize perf_tp_event_match Peter Zijlstra
@ 2010-05-21 11:30 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-21 11:30 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, acme, hpa, mingo, a.p.zijlstra, efault,
fweisbec, rostedt, tglx, mingo
Commit-ID: 580d607cd666dfabfc1c7b0fb08c8ac690c7c87f
Gitweb: http://git.kernel.org/tip/580d607cd666dfabfc1c7b0fb08c8ac690c7c87f
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 20 May 2010 20:54:31 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 21 May 2010 11:38:00 +0200
perf: Optimize perf_tp_event_match()
Since we know tracepoints come from kernel context,
avoid conditionals that try and establish that very
fact.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.904944001@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_event.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d25c864..e099650 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4496,7 +4496,10 @@ static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- if (perf_exclude_event(event, regs))
+ /*
+ * All tracepoints are from kernel-space.
+ */
+ if (event->attr.exclude_kernel)
return 0;
if (!perf_tp_filter_match(event, data))
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
` (2 preceding siblings ...)
2010-05-21 11:28 ` [tip:perf/core] perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events tip-bot for Peter Zijlstra
@ 2010-05-21 14:04 ` Steven Rostedt
2010-05-21 14:18 ` Peter Zijlstra
3 siblings, 1 reply; 53+ messages in thread
From: Steven Rostedt @ 2010-05-21 14:04 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 11:02 +0200, Peter Zijlstra wrote:
> plain text document attachment (perf-trace-per-cpu-fold.patch)
> Avoid the swevent hash-table by using per-tracepoint hlists.
>
> Also, avoid conditionals on the fast path by ordering with probe unregister
> so that we should never get on the callback path without the data being there.
>
> -void perf_trace_disable(int event_id)
> +void perf_trace_destroy(struct perf_event *p_event)
> {
> - struct ftrace_event_call *event;
> + struct ftrace_event_call *tp_event = p_event->tp_event;
> + int i;
>
> - mutex_lock(&event_mutex);
> - list_for_each_entry(event, &ftrace_events, list) {
> - if (event->id == event_id) {
> - perf_trace_event_disable(event);
> - module_put(event->mod);
> - break;
> + if (--tp_event->perf_refcount > 0)
> + return;
> +
You remove the event_mutex and then update the tp_event->perf_refcount
without any protection.
That tp_event->perf_refcount is global to the event not to the perf
event.
-- Steve
> + tp_event->perf_event_disable(tp_event);
> +
> + free_percpu(tp_event->perf_events);
> + tp_event->perf_events = NULL;
> +
> + if (!--total_ref_count) {
> + for (i = 0; i < 4; i++) {
> + free_percpu(perf_trace_buf[i]);
> + perf_trace_buf[i] = NULL;
> }
> }
> - mutex_unlock(&event_mutex);
> }
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 14:04 ` [PATCH 02/10] perf, trace: Use " Steven Rostedt
@ 2010-05-21 14:18 ` Peter Zijlstra
2010-05-21 14:25 ` Peter Zijlstra
0 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 14:18 UTC (permalink / raw)
To: rostedt
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 10:04 -0400, Steven Rostedt wrote:
> On Fri, 2010-05-21 at 11:02 +0200, Peter Zijlstra wrote:
> > plain text document attachment (perf-trace-per-cpu-fold.patch)
> > Avoid the swevent hash-table by using per-tracepoint hlists.
> >
> > Also, avoid conditionals on the fast path by ordering with probe unregister
> > so that we should never get on the callback path without the data being there.
> >
>
> > -void perf_trace_disable(int event_id)
> > +void perf_trace_destroy(struct perf_event *p_event)
> > {
> > - struct ftrace_event_call *event;
> > + struct ftrace_event_call *tp_event = p_event->tp_event;
> > + int i;
> >
> > - mutex_lock(&event_mutex);
> > - list_for_each_entry(event, &ftrace_events, list) {
> > - if (event->id == event_id) {
> > - perf_trace_event_disable(event);
> > - module_put(event->mod);
> > - break;
> > + if (--tp_event->perf_refcount > 0)
> > + return;
> > +
>
> You remove the event_mutex and then update the tp_event->perf_refcount
> without any protection.
>
> That tp_event->perf_refcount is global to the event not to the perf
> event.
Oh, bugger, yes. I seem to have lost it on destroy..
Thanks!
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events
2010-05-21 14:18 ` Peter Zijlstra
@ 2010-05-21 14:25 ` Peter Zijlstra
2010-05-31 7:20 ` [tip:perf/urgent] perf_events, trace: Fix perf_trace_destroy(), mutex went missing tip-bot for Peter Zijlstra
0 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-21 14:25 UTC (permalink / raw)
To: rostedt
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 16:18 +0200, Peter Zijlstra wrote:
>
> > That tp_event->perf_refcount is global to the event not to the perf
> > event.
>
> Oh, bugger, yes. I seem to have lost it on destroy..
---
Subject: perf, trace: Fix perf_trace_destroy(), mutex went missing
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri May 21 16:22:33 CEST 2010
Steve spotted I forgot to do the destroy under event_mutex.
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
kernel/trace/trace_event_perf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -125,8 +125,9 @@ void perf_trace_destroy(struct perf_even
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
+ mutex_lock(&event_mutex);
if (--tp_event->perf_refcount > 0)
- return;
+ goto out;
tp_event->perf_event_disable(tp_event);
@@ -145,6 +146,8 @@ void perf_trace_destroy(struct perf_even
perf_trace_buf[i] = NULL;
}
}
+out:
+ mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
@ 2010-05-21 17:43 ` Frank Ch. Eigler
2010-05-21 17:53 ` Steven Rostedt
2010-05-23 12:11 ` Paul Mackerras
2010-05-25 7:30 ` [PATCH 01a/10] perf, trace: Fix !x86 build issue Peter Zijlstra
2 siblings, 1 reply; 53+ messages in thread
From: Frank Ch. Eigler @ 2010-05-21 17:43 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo,
Frederic Weisbecker, Steven Rostedt, LKML
Peter Zijlstra <a.p.zijlstra@chello.nl> writes:
> [...]
> @@ -799,13 +799,10 @@ perf_trace_templ_##call(struct ftrace_ev
> static notrace void perf_trace_##call(proto) \
> { \
> struct ftrace_event_call *event_call = &event_##call; \
> - struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
> - \
> - perf_fetch_caller_regs(__regs, 1); \
> - \
> - perf_trace_templ_##template(event_call, __regs, args); \
> + struct pt_regs __regs; \
> \
> - put_cpu_var(perf_trace_regs); \
> + perf_fetch_caller_regs(&__regs, 1); \
> + perf_trace_templ_##template(event_call, &__regs, args); \
> }
> [...]
To what extent are you worried about something the size of struct
pt_regs being auto/stack allocated?
- FChE
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-21 17:43 ` Frank Ch. Eigler
@ 2010-05-21 17:53 ` Steven Rostedt
2010-05-21 18:07 ` Frank Ch. Eigler
0 siblings, 1 reply; 53+ messages in thread
From: Steven Rostedt @ 2010-05-21 17:53 UTC (permalink / raw)
To: Frank Ch. Eigler
Cc: Peter Zijlstra, Ingo Molnar, Paul Mackerras,
Arnaldo Carvalho de Melo, Frederic Weisbecker, LKML
On Fri, 2010-05-21 at 13:43 -0400, Frank Ch. Eigler wrote:
> Peter Zijlstra <a.p.zijlstra@chello.nl> writes:
>
> > [...]
> > @@ -799,13 +799,10 @@ perf_trace_templ_##call(struct ftrace_ev
> > static notrace void perf_trace_##call(proto) \
> > { \
> > struct ftrace_event_call *event_call = &event_##call; \
> > - struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
> > - \
> > - perf_fetch_caller_regs(__regs, 1); \
> > - \
> > - perf_trace_templ_##template(event_call, __regs, args); \
> > + struct pt_regs __regs; \
> > \
> > - put_cpu_var(perf_trace_regs); \
> > + perf_fetch_caller_regs(&__regs, 1); \
> > + perf_trace_templ_##template(event_call, &__regs, args); \
> > }
> > [...]
>
> To what extent are you worried about something the size of struct
> pt_regs being auto/stack allocated?
Isn't pt_regs already allocated on the stack when interrupted?
-- Steve
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-21 17:53 ` Steven Rostedt
@ 2010-05-21 18:07 ` Frank Ch. Eigler
0 siblings, 0 replies; 53+ messages in thread
From: Frank Ch. Eigler @ 2010-05-21 18:07 UTC (permalink / raw)
To: Steven Rostedt
Cc: Peter Zijlstra, Ingo Molnar, Paul Mackerras,
Arnaldo Carvalho de Melo, Frederic Weisbecker, LKML
Hi -
On Fri, May 21, 2010 at 01:53:00PM -0400, Steven Rostedt wrote:
> [...]
> > > - struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
> > > - [...]
> > > + struct pt_regs __regs; \
> > > [...]
> >
> > To what extent are you worried about something the size of struct
> > pt_regs being auto/stack allocated?
>
> Isn't pt_regs already allocated on the stack when interrupted?
Maybe, but here we're about to ask for another multi-hundred-byte one.
Does your stack-frame-size ftrace widget gripe about this change?
- FChE
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
2010-05-21 17:43 ` Frank Ch. Eigler
@ 2010-05-23 12:11 ` Paul Mackerras
2010-05-23 18:16 ` Peter Zijlstra
2010-05-25 7:30 ` [PATCH 01a/10] perf, trace: Fix !x86 build issue Peter Zijlstra
2 siblings, 1 reply; 53+ messages in thread
From: Paul Mackerras @ 2010-05-23 12:11 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Frederic Weisbecker,
Steven Rostedt, LKML
On Fri, May 21, 2010 at 11:02:02AM +0200, Peter Zijlstra wrote:
> entry = (struct trace_entry *)raw_data;
> - tracing_generic_entry_update(entry, *irq_flags, pc);
> + tracing_generic_entry_update(entry, regs->flags, pc);
> entry->type = type;
This is breaking on powerpc -- we don't have a "flags" element in
struct pt_regs. What is it trying to do? Get the interrupt enable
state, as local_irq_save(flags) would provide?
Paul.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-23 12:11 ` Paul Mackerras
@ 2010-05-23 18:16 ` Peter Zijlstra
2010-05-24 4:29 ` Paul Mackerras
2010-05-24 11:31 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Frederic Weisbecker
0 siblings, 2 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-23 18:16 UTC (permalink / raw)
To: Paul Mackerras
Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Frederic Weisbecker,
Steven Rostedt, LKML
On Sun, 2010-05-23 at 22:11 +1000, Paul Mackerras wrote:
> On Fri, May 21, 2010 at 11:02:02AM +0200, Peter Zijlstra wrote:
>
> > entry = (struct trace_entry *)raw_data;
> > - tracing_generic_entry_update(entry, *irq_flags, pc);
> > + tracing_generic_entry_update(entry, regs->flags, pc);
> > entry->type = type;
>
> This is breaking on powerpc -- we don't have a "flags" element in
> struct pt_regs. What is it trying to do? Get the interrupt enable
> state, as local_irq_save(flags) would provide?
Yes, I fouled that up.
On x86 perf_arch_fetch_caller_regs() stores pt_regs::flags, which is the
same as used in local_save_flags().
So to avoid another local_save_flags() for the ftrace code, I wanted to
reuse it, and totally forgot the !x86 side of things.
Possibly we could remove the local_save_flags() from the
perf_arch_fetch_caller_regs() as I'm not sure we actually use
pt_regs::flags (Frederic?) and leave the ftrace thing to use
local_save_flags().
Something like:
---
arch/x86/kernel/cpu/perf_event.c | 1 -
kernel/trace/trace_event_perf.c | 4 +++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fd4db0d..856aeeb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,6 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
- local_save_flags(regs->flags);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 26b8607..cb6f365 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -157,6 +157,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
+ unsigned long flags;
char *raw_data;
int pc;
@@ -174,7 +175,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, regs->flags, pc);
+ local_save_flags(flags);
+ tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-23 18:16 ` Peter Zijlstra
@ 2010-05-24 4:29 ` Paul Mackerras
2010-05-25 8:06 ` [tip:perf/core] perf, trace: Fix IRQ-disable removal " tip-bot for Peter Zijlstra
2010-05-24 11:31 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Frederic Weisbecker
1 sibling, 1 reply; 53+ messages in thread
From: Paul Mackerras @ 2010-05-24 4:29 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Frederic Weisbecker,
Steven Rostedt, LKML
On Sun, May 23, 2010 at 08:16:27PM +0200, Peter Zijlstra wrote:
> On Sun, 2010-05-23 at 22:11 +1000, Paul Mackerras wrote:
> > This is breaking on powerpc -- we don't have a "flags" element in
> > struct pt_regs. What is it trying to do? Get the interrupt enable
> > state, as local_irq_save(flags) would provide?
>
> Yes, I fouled that up.
>
> On x86 perf_arch_fetch_caller_regs() stores pt_regs::flags, which is the
> same as used in local_save_flags().
>
> So to avoid another local_save_flags() for the ftrace code, I wanted to
> reuse it, and totally forgot the !x86 side of things.
>
> Possibly we could remove the local_save_flags() from the
> perf_arch_fetch_caller_regs() as I'm not sure we actually use
> pt_regs::flags (Frederic?) and leave the ftrace thing to use
> local_save_flags().
>
> Something like:
Yes, that works -- at least, it compiles and seems to run. :)
Acked-by: Paul Mackerras <paulus@samba.org>
Paul.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction
2010-05-23 18:16 ` Peter Zijlstra
2010-05-24 4:29 ` Paul Mackerras
@ 2010-05-24 11:31 ` Frederic Weisbecker
1 sibling, 0 replies; 53+ messages in thread
From: Frederic Weisbecker @ 2010-05-24 11:31 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo,
Steven Rostedt, LKML
On Sun, May 23, 2010 at 08:16:27PM +0200, Peter Zijlstra wrote:
> On Sun, 2010-05-23 at 22:11 +1000, Paul Mackerras wrote:
> > On Fri, May 21, 2010 at 11:02:02AM +0200, Peter Zijlstra wrote:
> >
> > > entry = (struct trace_entry *)raw_data;
> > > - tracing_generic_entry_update(entry, *irq_flags, pc);
> > > + tracing_generic_entry_update(entry, regs->flags, pc);
> > > entry->type = type;
> >
> > This is breaking on powerpc -- we don't have a "flags" element in
> > struct pt_regs. What is it trying to do? Get the interrupt enable
> > state, as local_irq_save(flags) would provide?
>
> Yes, I fouled that up.
>
> On x86 perf_arch_fetch_caller_regs() stores pt_regs::flags, which is the
> same as used in local_save_flags().
>
> So to avoid another local_save_flags() for the ftrace code, I wanted to
> reuse it, and totally forgot the !x86 side of things.
>
> Possibly we could remove the local_save_flags() from the
> perf_arch_fetch_caller_regs() as I'm not sure we actually use
> pt_regs::flags (Frederic?) and leave the ftrace thing to use
> local_save_flags().
It was an anticipation for further uses. But since further are not
appearing, you can safely remove it.
Thanks.
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 01a/10] perf, trace: Fix !x86 build issue
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
2010-05-21 17:43 ` Frank Ch. Eigler
2010-05-23 12:11 ` Paul Mackerras
@ 2010-05-25 7:30 ` Peter Zijlstra
2 siblings, 0 replies; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-25 7:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Paul Mackerras, Arnaldo Carvalho de Melo, Frederic Weisbecker,
Steven Rostedt, LKML
Subject: perf, trace: Fix !x86 build issue
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue May 25 09:22:38 CEST 2010
Patch b7e2ecef92 (perf, trace: Optimize tracepoints by removing
IRQ-disable from perf/tracepoint interaction) made the unfortunate
mistake of assuming the world is x86 only, correct this.
The problem was that perf_fetch_caller_regs() did local_save_flags()
into regs->flags, and I re-used that to remove another
local_save_flags(), forgetting !x86 doesn't have regs->flags.
Do the reverse, remove the local_save_flags() from
perf_fetch_caller_regs() and let the ftrace site do the
local_save_flags() instead.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/x86/kernel/cpu/perf_event.c | 6 +++++-
kernel/trace/trace_event_perf.c | 4 +++-
2 files changed, 8 insertions(+), 2 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
- local_save_flags(regs->flags);
+ /*
+ * We abuse bit 3 to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+ regs->flags = 0;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -157,6 +157,7 @@ __kprobes void *perf_trace_buf_prepare(i
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
+ unsigned long flags;
char *raw_data;
int pc;
@@ -174,7 +175,8 @@ __kprobes void *perf_trace_buf_prepare(i
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, regs->flags, pc);
+ local_save_flags(flags);
+ tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [tip:perf/core] perf, trace: Fix IRQ-disable removal from perf/tracepoint interaction
2010-05-24 4:29 ` Paul Mackerras
@ 2010-05-25 8:06 ` tip-bot for Peter Zijlstra
2010-05-25 9:02 ` Peter Zijlstra
0 siblings, 1 reply; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-25 8:06 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, efault,
peterz, fweisbec, rostedt, tglx, mingo
Commit-ID: 23ef672d1e8236e6fe606a39963ba2d576972d84
Gitweb: http://git.kernel.org/tip/23ef672d1e8236e6fe606a39963ba2d576972d84
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Sun, 23 May 2010 20:16:27 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 25 May 2010 09:02:08 +0200
perf, trace: Fix IRQ-disable removal from perf/tracepoint interaction
On x86 perf_arch_fetch_caller_regs() stores pt_regs::flags, which is the
same as used in local_save_flags().
So to avoid another local_save_flags() for the ftrace code, I wanted to
reuse it, and totally forgot the !x86 side of things.
[ Possibly we could remove the local_save_flags() from the
perf_arch_fetch_caller_regs() as I'm not sure we actually use
pt_regs::flags (Frederic?) and leave the ftrace thing to use
local_save_flags(). ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100524042958.GA10628@drongo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 1 -
kernel/trace/trace_event_perf.c | 4 +++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fd4db0d..856aeeb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,6 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
- local_save_flags(regs->flags);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 26b8607..cb6f365 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -157,6 +157,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
+ unsigned long flags;
char *raw_data;
int pc;
@@ -174,7 +175,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, regs->flags, pc);
+ local_save_flags(flags);
+ tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [tip:perf/core] perf, trace: Fix IRQ-disable removal from perf/tracepoint interaction
2010-05-25 8:06 ` [tip:perf/core] perf, trace: Fix IRQ-disable removal " tip-bot for Peter Zijlstra
@ 2010-05-25 9:02 ` Peter Zijlstra
2010-05-25 9:30 ` [tip:perf/core] perf, trace: Fix !x86 build bug tip-bot for Peter Zijlstra
0 siblings, 1 reply; 53+ messages in thread
From: Peter Zijlstra @ 2010-05-25 9:02 UTC (permalink / raw)
To: mingo, hpa, paulus, acme, linux-kernel, efault, fweisbec, rostedt,
tglx, mingo
Cc: linux-tip-commits
On Tue, 2010-05-25 at 08:06 +0000, tip-bot for Peter Zijlstra wrote:
> Commit-ID: 23ef672d1e8236e6fe606a39963ba2d576972d84
> Gitweb: http://git.kernel.org/tip/23ef672d1e8236e6fe606a39963ba2d576972d84
> Author: Peter Zijlstra <peterz@infradead.org>
> AuthorDate: Sun, 23 May 2010 20:16:27 +0200
> Committer: Ingo Molnar <mingo@elte.hu>
> CommitDate: Tue, 25 May 2010 09:02:08 +0200
>
> perf, trace: Fix IRQ-disable removal from perf/tracepoint interaction
>
> On x86 perf_arch_fetch_caller_regs() stores pt_regs::flags, which is the
> same as used in local_save_flags().
>
> So to avoid another local_save_flags() for the ftrace code, I wanted to
> reuse it, and totally forgot the !x86 side of things.
>
> [ Possibly we could remove the local_save_flags() from the
> perf_arch_fetch_caller_regs() as I'm not sure we actually use
> pt_regs::flags (Frederic?) and leave the ftrace thing to use
> local_save_flags(). ]
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Mike Galbraith <efault@gmx.de>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> LKML-Reference: <20100524042958.GA10628@drongo>
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Could you replace this with:
---
Subject: perf, trace: Fix !x86 build issue
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue May 25 09:22:38 CEST 2010
Patch b7e2ecef92 (perf, trace: Optimize tracepoints by removing
IRQ-disable from perf/tracepoint interaction) made the unfortunate
mistake of assuming the world is x86 only, correct this.
The problem was that perf_fetch_caller_regs() did local_save_flags()
into regs->flags, and I re-used that to remove another
local_save_flags(), forgetting !x86 doesn't have regs->flags.
Do the reverse, remove the local_save_flags() from
perf_fetch_caller_regs() and let the ftrace site do the
local_save_flags() instead.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
---
arch/x86/kernel/cpu/perf_event.c | 6 +++++-
kernel/trace/trace_event_perf.c | 4 +++-
2 files changed, 8 insertions(+), 2 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
- local_save_flags(regs->flags);
+ /*
+ * We abuse bit 3 to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+ regs->flags = 0;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
Index: linux-2.6/kernel/trace/trace_event_perf.c
===================================================================
--- linux-2.6.orig/kernel/trace/trace_event_perf.c
+++ linux-2.6/kernel/trace/trace_event_perf.c
@@ -157,6 +157,7 @@ __kprobes void *perf_trace_buf_prepare(i
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
+ unsigned long flags;
char *raw_data;
int pc;
@@ -174,7 +175,8 @@ __kprobes void *perf_trace_buf_prepare(i
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, regs->flags, pc);
+ local_save_flags(flags);
+ tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
^ permalink raw reply [flat|nested] 53+ messages in thread
* [tip:perf/core] perf, trace: Fix !x86 build bug
2010-05-25 9:02 ` Peter Zijlstra
@ 2010-05-25 9:30 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-25 9:30 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, peterz, tglx,
mingo
Commit-ID: 87f44bbc246c5244c76a701f8eefba7788bce64a
Gitweb: http://git.kernel.org/tip/87f44bbc246c5244c76a701f8eefba7788bce64a
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 25 May 2010 11:02:55 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 25 May 2010 11:28:49 +0200
perf, trace: Fix !x86 build bug
Patch b7e2ecef92 (perf, trace: Optimize tracepoints by removing
IRQ-disable from perf/tracepoint interaction) made the
unfortunate mistake of assuming the world is x86 only, correct
this.
The problem was that perf_fetch_caller_regs() did
local_save_flags() into regs->flags, and I re-used that to
remove another local_save_flags(), forgetting !x86 doesn't have
regs->flags.
Do the reverse, remove the local_save_flags() from
perf_fetch_caller_regs() and let the ftrace site do the
local_save_flags() instead.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: acme@redhat.com
Cc: efault@gmx.de
Cc: fweisbec@gmail.com
Cc: rostedt@goodmis.org
LKML-Reference: <1274778175.5882.623.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 6 +++++-
kernel/trace/trace_event_perf.c | 4 +++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fd4db0d..c775860 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
- local_save_flags(regs->flags);
+ /*
+ * We abuse bit 3 to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+ regs->flags = 0;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 26b8607..cb6f365 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -157,6 +157,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
+ unsigned long flags;
char *raw_data;
int pc;
@@ -174,7 +175,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
- tracing_generic_entry_update(entry, regs->flags, pc);
+ local_save_flags(flags);
+ tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/urgent] perf_events, trace: Fix probe unregister race
2010-05-21 10:41 ` [PATCH 02b/10] perf, trace: Fix probe unregister race Peter Zijlstra
2010-05-21 10:43 ` Frederic Weisbecker
@ 2010-05-31 7:19 ` tip-bot for Peter Zijlstra
1 sibling, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-31 7:19 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, fweisbec, a.p.zijlstra, tglx, mingo
Commit-ID: 3771f0771154675d4a0ca780be2411f3cc357208
Gitweb: http://git.kernel.org/tip/3771f0771154675d4a0ca780be2411f3cc357208
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 21 May 2010 12:31:09 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 31 May 2010 08:46:09 +0200
perf_events, trace: Fix probe unregister race
tracepoint_probe_unregister() does not synchronize against the probe
callbacks, so do that explicitly. This properly serializes the callbacks
and the free of the data used therein.
Also, use this_cpu_ptr() where possible.
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274438476.1674.1702.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/trace/ftrace.h | 2 +-
kernel/trace/trace_event_perf.c | 10 ++++++++--
kernel/trace/trace_kprobe.c | 4 ++--
kernel/trace/trace_syscalls.c | 4 ++--
4 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 3d685d1..5a64905 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -725,7 +725,7 @@ perf_trace_##call(void *__data, proto) \
\
{ assign; } \
\
- head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
+ head = this_cpu_ptr(event_call->perf_events); \
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, &__regs, head); \
}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365..49c7abf 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -116,7 +116,7 @@ int perf_trace_enable(struct perf_event *p_event)
if (WARN_ON_ONCE(!list))
return -EINVAL;
- list = per_cpu_ptr(list, smp_processor_id());
+ list = this_cpu_ptr(list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
@@ -142,6 +142,12 @@ void perf_trace_destroy(struct perf_event *p_event)
tp_event->class->perf_probe,
tp_event);
+ /*
+ * Ensure our callback won't be called anymore. See
+ * tracepoint_probe_unregister() and __DO_TRACE().
+ */
+ synchronize_sched();
+
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
@@ -169,7 +175,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
if (*rctxp < 0)
return NULL;
- raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
+ raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index faf7cef..f52b5f5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
}
@@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d2c859c..34e3580 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->enter_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
@@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->exit_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [tip:perf/urgent] perf_events, trace: Fix perf_trace_destroy(), mutex went missing
2010-05-21 14:25 ` Peter Zijlstra
@ 2010-05-31 7:20 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 53+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-05-31 7:20 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, rostedt, a.p.zijlstra, tglx, mingo
Commit-ID: 2e97942fe57864588774f173cf4cd7bb68968b76
Gitweb: http://git.kernel.org/tip/2e97942fe57864588774f173cf4cd7bb68968b76
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 21 May 2010 16:22:33 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 31 May 2010 08:46:09 +0200
perf_events, trace: Fix perf_trace_destroy(), mutex went missing
Steve spotted I forgot to do the destroy under event_mutex.
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274451913.1674.1707.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/trace/trace_event_perf.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 49c7abf..e6f6588 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -132,8 +132,9 @@ void perf_trace_destroy(struct perf_event *p_event)
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
+ mutex_lock(&event_mutex);
if (--tp_event->perf_refcount > 0)
- return;
+ goto out;
if (tp_event->class->reg)
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
@@ -157,6 +158,8 @@ void perf_trace_destroy(struct perf_event *p_event)
perf_trace_buf[i] = NULL;
}
}
+out:
+ mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
^ permalink raw reply related [flat|nested] 53+ messages in thread
end of thread, other threads:[~2010-05-31 7:20 UTC | newest]
Thread overview: 53+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-21 9:02 [PATCH 00/10] perf tracepoint and output optimizations Peter Zijlstra
2010-05-21 9:02 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Peter Zijlstra
2010-05-21 17:43 ` Frank Ch. Eigler
2010-05-21 17:53 ` Steven Rostedt
2010-05-21 18:07 ` Frank Ch. Eigler
2010-05-23 12:11 ` Paul Mackerras
2010-05-23 18:16 ` Peter Zijlstra
2010-05-24 4:29 ` Paul Mackerras
2010-05-25 8:06 ` [tip:perf/core] perf, trace: Fix IRQ-disable removal " tip-bot for Peter Zijlstra
2010-05-25 9:02 ` Peter Zijlstra
2010-05-25 9:30 ` [tip:perf/core] perf, trace: Fix !x86 build bug tip-bot for Peter Zijlstra
2010-05-24 11:31 ` [PATCH 01/10] perf, trace: Remove IRQ-disable from perf/tracepoint interaction Frederic Weisbecker
2010-05-25 7:30 ` [PATCH 01a/10] perf, trace: Fix !x86 build issue Peter Zijlstra
2010-05-21 9:02 ` [PATCH 02/10] perf, trace: Use per-tracepoint-per-cpu hlist to track events Peter Zijlstra
2010-05-21 9:40 ` Frederic Weisbecker
2010-05-21 10:02 ` Peter Zijlstra
2010-05-21 10:13 ` Frederic Weisbecker
2010-05-21 10:15 ` Peter Zijlstra
2010-05-21 10:19 ` Frederic Weisbecker
2010-05-21 10:38 ` Ingo Molnar
2010-05-21 10:51 ` Ingo Molnar
2010-05-21 10:19 ` Peter Zijlstra
2010-05-21 10:21 ` Frederic Weisbecker
2010-05-21 10:34 ` Peter Zijlstra
2010-05-21 10:38 ` Frederic Weisbecker
2010-05-21 10:41 ` [PATCH 02b/10] perf, trace: Fix probe unregister race Peter Zijlstra
2010-05-21 10:43 ` Frederic Weisbecker
2010-05-31 7:19 ` [tip:perf/urgent] perf_events, " tip-bot for Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events tip-bot for Peter Zijlstra
2010-05-21 14:04 ` [PATCH 02/10] perf, trace: Use " Steven Rostedt
2010-05-21 14:18 ` Peter Zijlstra
2010-05-21 14:25 ` Peter Zijlstra
2010-05-31 7:20 ` [tip:perf/urgent] perf_events, trace: Fix perf_trace_destroy(), mutex went missing tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 03/10] perf: Ensure IOC_OUTPUT isnt used to create multi-writer buffers Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] perf: Ensure that IOC_OUTPUT isn't " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 04/10] perf-record: Remove -M Peter Zijlstra
2010-05-21 11:28 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 05/10] perf-record: Share per-cpu buffers Peter Zijlstra
2010-05-21 9:44 ` Frederic Weisbecker
2010-05-21 10:03 ` Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 06/10] perf: Fix wakeup storm for RO mmap()s Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 07/10] perf: Optimize perf_output_copy Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] perf: Optimize perf_output_copy() tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 08/10] perf: Optimize the !vmalloc backed buffer Peter Zijlstra
2010-05-21 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 09/10] perf: Remove more fastpath code Peter Zijlstra
2010-05-21 11:15 ` Steven Rostedt
2010-05-21 11:18 ` Peter Zijlstra
2010-05-21 11:30 ` [tip:perf/core] perf: Remove more code from the fastpath tip-bot for Peter Zijlstra
2010-05-21 9:02 ` [PATCH 10/10] perf: Optimize perf_tp_event_match Peter Zijlstra
2010-05-21 11:30 ` [tip:perf/core] perf: Optimize perf_tp_event_match() tip-bot for Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).