* [PATCH 1/4] ptrace: Add variant of profile_pc for passing in pc
@ 2011-09-15 22:56 Andi Kleen
2011-09-15 22:56 ` [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer Andi Kleen
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Andi Kleen @ 2011-09-15 22:56 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: acme, linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Add a variant of profile_pc() that allows passing in the
instruction pointer.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/ptrace.h | 1 +
arch/x86/kernel/time.c | 9 ++++++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 3566454..0fafaac 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -139,6 +139,7 @@ struct cpuinfo_x86;
struct task_struct;
extern unsigned long profile_pc(struct pt_regs *regs);
+extern unsigned long __profile_pc(unsigned long pc, struct pt_regs *regs);
#define profile_pc profile_pc
extern unsigned long
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 5a64d05..cd98120 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -26,10 +26,8 @@
DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
#endif
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long __profile_pc(unsigned long pc, struct pt_regs *regs)
{
- unsigned long pc = instruction_pointer(regs);
-
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
@@ -49,6 +47,11 @@ unsigned long profile_pc(struct pt_regs *regs)
}
return pc;
}
+
+unsigned long profile_pc(struct pt_regs *regs)
+{
+ return __profile_pc(instruction_pointer(regs), regs);
+}
EXPORT_SYMBOL(profile_pc);
/*
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer
2011-09-15 22:56 [PATCH 1/4] ptrace: Add variant of profile_pc for passing in pc Andi Kleen
@ 2011-09-15 22:56 ` Andi Kleen
2011-09-15 23:16 ` Thomas Gleixner
2011-09-15 22:56 ` [PATCH 3/4] perf_events: Support a lock_parent event flag Andi Kleen
2011-09-15 22:56 ` [PATCH 4/4] perf tools: Add -l flag to top/record to account to parent Andi Kleen
2 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2011-09-15 22:56 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: acme, linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Used in next patch.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/powerpc/include/asm/perf_event_server.h | 3 ++-
arch/powerpc/kernel/perf_event.c | 3 ++-
arch/x86/include/asm/perf_event.h | 3 ++-
arch/x86/kernel/cpu/perf_event.c | 3 ++-
include/linux/perf_event.h | 2 +-
kernel/events/core.c | 2 +-
6 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df12..ea82ec3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -59,7 +59,8 @@ extern int register_power_pmu(struct power_pmu *);
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs,
+ struct perf_event *event);
#define PERF_EVENT_INDEX_OFFSET 1
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 10a140f..9067e0a 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1281,7 +1281,8 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
* Called from generic code to get the instruction pointer
* for an event_id.
*/
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_instruction_pointer(struct pt_regs *regs,
+ struct perf_event *event)
{
unsigned long ip;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 094fb30..790479f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -137,7 +137,8 @@ extern void perf_events_lapic_init(void);
#define PERF_EFLAGS_EXACT (1UL << 3)
struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs,
+ struct perf_event *event);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index cfa62ec..373a614 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1923,7 +1923,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_instruction_pointer(struct pt_regs *regs,
+ struct perf_event *event)
{
unsigned long ip;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c816075..f8b93ec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1129,7 +1129,7 @@ extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
# define perf_misc_flags(regs) \
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
-# define perf_instruction_pointer(regs) instruction_pointer(regs)
+# define perf_instruction_pointer(regs,event) instruction_pointer(regs)
#endif
extern int perf_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0f85778..7f5c966 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4004,7 +4004,7 @@ void perf_prepare_sample(struct perf_event_header *header,
__perf_event_header__init_id(header, data, event);
if (sample_type & PERF_SAMPLE_IP)
- data->ip = perf_instruction_pointer(regs);
+ data->ip = perf_instruction_pointer(regs, event);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/4] perf_events: Support a lock_parent event flag
2011-09-15 22:56 [PATCH 1/4] ptrace: Add variant of profile_pc for passing in pc Andi Kleen
2011-09-15 22:56 ` [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer Andi Kleen
@ 2011-09-15 22:56 ` Andi Kleen
2011-09-20 9:16 ` Peter Zijlstra
2011-09-15 22:56 ` [PATCH 4/4] perf tools: Add -l flag to top/record to account to parent Andi Kleen
2 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2011-09-15 22:56 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: acme, linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Add a new lock_parent flag to the event attribute. When it is set
account spinlocks to the parent. This is similar to how oprofile
behaves on x86. This just reuses the oprofile code for this.
The main advantage is that it allows to make more sense of locking
problems without requiring full callgraphs.
Right now only implemented on x86.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.c | 3 +++
include/linux/perf_event.h | 3 ++-
2 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 373a614..f2caa6e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1933,6 +1933,9 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs,
else
ip = instruction_pointer(regs);
+ if (event->attr.lock_parent)
+ ip = __profile_pc(ip, regs);
+
return ip;
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f8b93ec..51da085 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -219,8 +219,9 @@ struct perf_event_attr {
precise_ip : 2, /* skid constraint */
mmap_data : 1, /* non-exec mmap data */
sample_id_all : 1, /* sample_type all events */
+ lock_parent : 1, /* count locks in parent */
- __reserved_1 : 45;
+ __reserved_1 : 44;
union {
__u32 wakeup_events; /* wakeup every n events */
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/4] perf tools: Add -l flag to top/record to account to parent
2011-09-15 22:56 [PATCH 1/4] ptrace: Add variant of profile_pc for passing in pc Andi Kleen
2011-09-15 22:56 ` [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer Andi Kleen
2011-09-15 22:56 ` [PATCH 3/4] perf_events: Support a lock_parent event flag Andi Kleen
@ 2011-09-15 22:56 ` Andi Kleen
2 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2011-09-15 22:56 UTC (permalink / raw)
To: a.p.zijlstra; +Cc: acme, linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Add a new -l flag to top and record to account kernel locks
to the parent.
The main beneficiary is top because it doesn't support callgraphs.
This allows to use perf top to identify locking problems:
you can see the actual caller now instead of just "__spin_lock"
For record it can be a lower overhead alternative to -g
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/perf/Documentation/perf-report.txt | 4 ++++
tools/perf/Documentation/perf-top.txt | 4 ++++
tools/perf/builtin-record.c | 6 ++++++
tools/perf/builtin-top.c | 6 ++++++
4 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 04253c0..e78c9d4 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -60,6 +60,10 @@ OPTIONS
--parent=<regex>::
regex filter to identify parent, see: '--sort parent'
+-l::
+--lock-parent::
+ account kernel locks to parent function.
+
-x::
--exclude-other::
Only display entries with parent-match.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index f6eb1cd..a0b0e16 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -70,6 +70,10 @@ Default is to monitor all CPUS.
--mmap-pages=<pages>::
Number of mmapped data pages.
+-l::
+--lock-parent::
+ Account kernel locks to parent function.
+
-p <pid>::
--pid=<pid>::
Profile events on existing Process ID.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6b0519f..8cc2585 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -68,6 +68,7 @@ static struct perf_evlist *evsel_list;
static long samples = 0;
static u64 bytes_written = 0;
+static bool lock_parent = false;
static int file_new = 1;
static off_t post_processing_offset;
@@ -161,6 +162,9 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
struct perf_event_attr *attr = &evsel->attr;
int track = !evsel->idx; /* only the first counter needs these */
+ if (lock_parent)
+ attr->lock_parent = 1;
+
attr->inherit = !no_inherit;
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -751,6 +755,8 @@ const struct option record_options[] = {
"output file name"),
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
"child tasks do not inherit counters"),
+ OPT_BOOLEAN('l', "lock-parent", &lock_parent,
+ "account kernel locks to parent function"),
OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
OPT_BOOLEAN(0, "group", &group,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a43433f..2ac73b0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -95,6 +95,7 @@ static struct winsize winsize;
static const char *sym_filter = NULL;
struct sym_entry *sym_filter_entry_sched = NULL;
static int sym_pcnt_filter = 5;
+static bool lock_parent = false;
/*
* Source functions
@@ -861,6 +862,9 @@ static void start_counters(struct perf_evlist *evlist)
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ if (lock_parent)
+ attr->lock_parent = 1;
+
if (top.freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1;
@@ -1005,6 +1009,8 @@ static const struct option options[] = {
"file", "vmlinux pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
+ OPT_BOOLEAN('l', "lock-parent", &lock_parent,
+ "Account kernel locks to parent function"),
OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer
2011-09-15 22:56 ` [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer Andi Kleen
@ 2011-09-15 23:16 ` Thomas Gleixner
0 siblings, 0 replies; 7+ messages in thread
From: Thomas Gleixner @ 2011-09-15 23:16 UTC (permalink / raw)
To: Andi Kleen; +Cc: a.p.zijlstra, acme, linux-kernel, Andi Kleen
On Thu, 15 Sep 2011, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Used in next patch.
What for?
Just get it: We want to have reasonable changelogs and not a puzzle
game!
Is it really that hard ?
Thanks,
tglx
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/4] perf_events: Support a lock_parent event flag
2011-09-15 22:56 ` [PATCH 3/4] perf_events: Support a lock_parent event flag Andi Kleen
@ 2011-09-20 9:16 ` Peter Zijlstra
2011-09-20 17:51 ` Andi Kleen
0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2011-09-20 9:16 UTC (permalink / raw)
To: Andi Kleen; +Cc: acme, linux-kernel, Andi Kleen
On Thu, 2011-09-15 at 15:56 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add a new lock_parent flag to the event attribute. When it is set
> account spinlocks to the parent. This is similar to how oprofile
> behaves on x86. This just reuses the oprofile code for this.
>
> The main advantage is that it allows to make more sense of locking
> problems without requiring full callgraphs.
>
> Right now only implemented on x86.
>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
> arch/x86/kernel/cpu/perf_event.c | 3 +++
> include/linux/perf_event.h | 3 ++-
> 2 files changed, 5 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 373a614..f2caa6e 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1933,6 +1933,9 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs,
> else
> ip = instruction_pointer(regs);
>
> + if (event->attr.lock_parent)
> + ip = __profile_pc(ip, regs);
> +
> return ip;
> }
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f8b93ec..51da085 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -219,8 +219,9 @@ struct perf_event_attr {
> precise_ip : 2, /* skid constraint */
> mmap_data : 1, /* non-exec mmap data */
> sample_id_all : 1, /* sample_type all events */
> + lock_parent : 1, /* count locks in parent */
>
This all just sucks horridly. So this name tells us we need inherited
counters so that whenever we encounter a lock it will be accounted in
the parent process, or whatever -- which doesn't make any sense.
Furthermore, the sole reason you want this is because you don't want
callchains, supposedly because they're too expensive, but then you don't
say that.
How about you provide means of limiting the callchain depth, and then
frob the in_lock_function() and unwind 1 crap in userspace?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/4] perf_events: Support a lock_parent event flag
2011-09-20 9:16 ` Peter Zijlstra
@ 2011-09-20 17:51 ` Andi Kleen
0 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2011-09-20 17:51 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Andi Kleen, acme, linux-kernel
> This all just sucks horridly. So this name tells us we need inherited
> counters so that whenever we encounter a lock it will be accounted in
> the parent process, or whatever -- which doesn't make any sense.
Thanks for the kind words. So what name do you suggest?
account_in_caller_of_lock ?
i_write_a_novel_in_every_variable_name?
> Furthermore, the sole reason you want this is because you don't want
> callchains, supposedly because they're too expensive, but then you don't
> say that.
perf top doesn't support callchains. And yes it's a lot cheaper too, but
that wasn't
the main motivation. perf top is just useless to diagnose any lock problems
currently.
I say that in the changelog for the user space.
> How about you provide means of limiting the callchain depth, and then
> frob the in_lock_function() and unwind 1 crap in userspace?
Is that equivalent? The normal call chains don't work without frame
pointers.
profile_pc does.
-Andi
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2011-09-20 17:51 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-15 22:56 [PATCH 1/4] ptrace: Add variant of profile_pc for passing in pc Andi Kleen
2011-09-15 22:56 ` [PATCH 2/4] perf_events: Pass the event to perf_instruction_pointer Andi Kleen
2011-09-15 23:16 ` Thomas Gleixner
2011-09-15 22:56 ` [PATCH 3/4] perf_events: Support a lock_parent event flag Andi Kleen
2011-09-20 9:16 ` Peter Zijlstra
2011-09-20 17:51 ` Andi Kleen
2011-09-15 22:56 ` [PATCH 4/4] perf tools: Add -l flag to top/record to account to parent Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).