public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso
@ 2017-06-12 22:51 Mark Santaniello
  2017-06-12 22:51 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
  0 siblings, 1 reply; 7+ messages in thread
From: Mark Santaniello @ 2017-06-12 22:51 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin
  Cc: linux-kernel, Mark Santaniello

Perf script can report the dso for "addr" and "ip" fields.
This adds the same support for the "brstack" and "brstacksym" fields.
This can be helpful for AutoFDO: we can ignore LBR entries unless the
source and target address are both in the target module we are about to
build.

I built a small test akin to "while(1) { do_nothing(); }" where the
do_nothing function is loaded from a dso.  I sampled the execution with
perf record -b.  Using the new perf script functionality I can easily find
cases where there was a transition from one dso to another:

$ ./perf record -a -b -- sleep 5
[ perf record: Woken up 55 times to write data ]
[ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ]

$ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

$ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0

Signed-off-by: Mark Santaniello <marksan@fb.com>
---
 tools/perf/builtin-script.c | 61 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4761b0d..6a7033b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-		pr_err("Display of DSO requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
-		       "to DSO.\n");
+	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
+		pr_err("Display of DSO requested but none of sample IP, sample address, "
+		       "brstack\nor brstacksym are selected. Hence, no addresses to "
+		       "convert to DSO.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br)
 	return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(struct perf_sample *sample)
+static void print_sample_brstack(struct perf_sample *sample,
+				 struct thread *thread,
+				 struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
-	u64 i;
+	struct addr_location alf, alt;
+	u64 i, from, to;
 
 	if (!(br && br->nr))
 		return;
 
 	for (i = 0; i < br->nr; i++) {
-		printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
-			br->entries[i].from,
-			br->entries[i].to,
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		if (PRINT_FIELD(DSO)) {
+			memset(&alf, 0, sizeof(alf));
+			memset(&alt, 0, sizeof(alt));
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		}
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+
+		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
 			br->entries[i].flags.abort? 'A' : '-',
@@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample)
 }
 
 static void print_sample_brstacksym(struct perf_sample *sample,
-				    struct thread *thread)
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct addr_location alf, alt;
@@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
 		putchar('/');
 		symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
 		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
@@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script,
 		print_sample_iregs(sample, attr);
 
 	if (PRINT_FIELD(BRSTACK))
-		print_sample_brstack(sample);
+		print_sample_brstack(sample, thread, attr);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		print_sample_brstacksym(sample, thread);
+		print_sample_brstacksym(sample, thread, attr);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-12 22:51 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello
@ 2017-06-12 22:51 ` Mark Santaniello
  2017-06-12 23:36   ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 7+ messages in thread
From: Mark Santaniello @ 2017-06-12 22:51 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin
  Cc: linux-kernel, Mark Santaniello

The idea here is to make AutoFDO easier in cloud environment with ASLR.
It's easiest to show how this is useful by example. I built a small test
akin to "while(1) { do_nothing(); }" where the do_nothing function is
loaded from a dso.  I sampled the execution with perf record -b.

Using the existing "brstack,dso", we get absolute addresses that are
affected by ASLR, and could be different on different hosts. The address
does not uniquely identify a branch/target in the binary:
$ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

Using the existing "brstacksym,dso" is a little better, because the
symbol plus offset and dso name *does* uniquely identify a branch/target
in the binary.  Ultimately, however, AutoFDO wants a simple offset into
the binary, so we'd have to undo all the work perf did to symbolize in
the first place:
$ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0

With the new "brstackoff,dso" we get what we need: a simple offset into a
specific dso/binary that uniquely identifies a branch/target:
$ ./perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

Note this patch depends on commit 9d0ec0748cd9.

Signed-off-by: Mark Santaniello <marksan@fb.com>
---
 tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6a7033b..1effc64 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -85,6 +85,7 @@ enum perf_output_field {
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
 };
 
 struct output_option {
@@ -115,6 +116,7 @@ struct output_option {
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
 };
 
 /* default set to maintain compatibility with current format */
@@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
-	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
-		pr_err("Display of DSO requested but none of sample IP, sample address, "
-		       "brstack\nor brstacksym are selected. Hence, no addresses to "
-		       "convert to DSO.\n");
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+		pr_err("Display of DSO requested but no address to convert.  Select\n"
+		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_brstackoff(struct perf_sample *sample,
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map && !alf.map->dso->adjust_symbols)
+			from = map__map_ip(alf.map, from);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map && !alt.map->dso->adjust_symbols)
+			to = map__map_ip(alt.map, to);
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+		printf("/%c/%c/%c/%d ",
+			mispred_str(br->entries + i),
+			br->entries[i].flags.in_tx ? 'X' : '-',
+			br->entries[i].flags.abort ? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
 #define MAXBB 16384UL
 
 static int grab_bb(u8 *buffer, u64 start, u64 end,
@@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script,
 		print_sample_brstack(sample, thread, attr);
 	else if (PRINT_FIELD(BRSTACKSYM))
 		print_sample_brstacksym(sample, thread, attr);
+	else if (PRINT_FIELD(BRSTACKOFF))
+		print_sample_brstackoff(sample, thread, attr);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-12 22:51 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
@ 2017-06-12 23:36   ` Arnaldo Carvalho de Melo
  2017-06-13  3:53     ` Mark Santaniello
  0 siblings, 1 reply; 7+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-12 23:36 UTC (permalink / raw)
  To: Mark Santaniello
  Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, linux-kernel

Em Mon, Jun 12, 2017 at 03:51:13PM -0700, Mark Santaniello escreveu:
> The idea here is to make AutoFDO easier in cloud environment with ASLR.
> It's easiest to show how this is useful by example. I built a small test
> akin to "while(1) { do_nothing(); }" where the do_nothing function is
> loaded from a dso.  I sampled the execution with perf record -b.
> 
> Using the existing "brstack,dso", we get absolute addresses that are
> affected by ASLR, and could be different on different hosts. The address
> does not uniquely identify a branch/target in the binary:
> $ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
> > | grep burncpu | grep dso.so | head -n 1
> 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0
> 
> Using the existing "brstacksym,dso" is a little better, because the
> symbol plus offset and dso name *does* uniquely identify a branch/target
> in the binary.  Ultimately, however, AutoFDO wants a simple offset into
> the binary, so we'd have to undo all the work perf did to symbolize in
> the first place:
> $ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
> > | grep burncpu | grep dso.so | head -n 1
> do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0
> 
> With the new "brstackoff,dso" we get what we need: a simple offset into a
> specific dso/binary that uniquely identifies a branch/target:
> $ ./perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' \
> > | grep burncpu | grep dso.so | head -n 1
> 0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0
> 
> Note this patch depends on commit 9d0ec0748cd9.

Are you refererring to patch 1/2 in this series?

[acme@jouet linux]$ git show 9d0ec0748cd9
fatal: ambiguous argument '9d0ec0748cd9': unknown revision or path not in the working tree.
[acme@jouet linux]$

Can you please provide the test proggie source code?

Seems simple enough, but to save time for people trying to test your
patch, it would be good to have it available.

- Arnaldo
 
> Signed-off-by: Mark Santaniello <marksan@fb.com>
> ---
>  tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 52 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 6a7033b..1effc64 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -85,6 +85,7 @@ enum perf_output_field {
>  	PERF_OUTPUT_INSN	    = 1U << 21,
>  	PERF_OUTPUT_INSNLEN	    = 1U << 22,
>  	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
> +	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
>  };
>  
>  struct output_option {
> @@ -115,6 +116,7 @@ struct output_option {
>  	{.str = "insn", .field = PERF_OUTPUT_INSN},
>  	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
>  	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
> +	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
>  };
>  
>  /* default set to maintain compatibility with current format */
> @@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
>  		return -EINVAL;
>  	}
>  	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
> -	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
> -		pr_err("Display of DSO requested but none of sample IP, sample address, "
> -		       "brstack\nor brstacksym are selected. Hence, no addresses to "
> -		       "convert to DSO.\n");
> +	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
> +		pr_err("Display of DSO requested but no address to convert.  Select\n"
> +		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
>  		return -EINVAL;
>  	}
>  	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
> @@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
>  	}
>  }
>  
> +static void print_sample_brstackoff(struct perf_sample *sample,
> +				    struct thread *thread,
> +				    struct perf_event_attr *attr)
> +{
> +	struct branch_stack *br = sample->branch_stack;
> +	struct addr_location alf, alt;
> +	u64 i, from, to;
> +
> +	if (!(br && br->nr))
> +		return;
> +
> +	for (i = 0; i < br->nr; i++) {
> +
> +		memset(&alf, 0, sizeof(alf));
> +		memset(&alt, 0, sizeof(alt));
> +		from = br->entries[i].from;
> +		to   = br->entries[i].to;
> +
> +		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
> +		if (alf.map && !alf.map->dso->adjust_symbols)
> +			from = map__map_ip(alf.map, from);
> +
> +		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
> +		if (alt.map && !alt.map->dso->adjust_symbols)
> +			to = map__map_ip(alt.map, to);
> +
> +		printf("0x%"PRIx64, from);
> +		if (PRINT_FIELD(DSO)) {
> +			printf("(");
> +			map__fprintf_dsoname(alf.map, stdout);
> +			printf(")");
> +		}
> +		printf("/0x%"PRIx64, to);
> +		if (PRINT_FIELD(DSO)) {
> +			printf("(");
> +			map__fprintf_dsoname(alt.map, stdout);
> +			printf(")");
> +		}
> +		printf("/%c/%c/%c/%d ",
> +			mispred_str(br->entries + i),
> +			br->entries[i].flags.in_tx ? 'X' : '-',
> +			br->entries[i].flags.abort ? 'A' : '-',
> +			br->entries[i].flags.cycles);
> +	}
> +}
>  #define MAXBB 16384UL
>  
>  static int grab_bb(u8 *buffer, u64 start, u64 end,
> @@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script,
>  		print_sample_brstack(sample, thread, attr);
>  	else if (PRINT_FIELD(BRSTACKSYM))
>  		print_sample_brstacksym(sample, thread, attr);
> +	else if (PRINT_FIELD(BRSTACKOFF))
> +		print_sample_brstackoff(sample, thread, attr);
>  
>  	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
>  		print_sample_bpf_output(sample);
> -- 
> 2.9.3

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-12 23:36   ` Arnaldo Carvalho de Melo
@ 2017-06-13  3:53     ` Mark Santaniello
  0 siblings, 0 replies; 7+ messages in thread
From: Mark Santaniello @ 2017-06-13  3:53 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	linux-kernel@vger.kernel.org

On 6/12/17, 4:36 PM, "Arnaldo Carvalho de Melo" <acme@kernel.org> wrote:

    Em Mon, Jun 12, 2017 at 03:51:13PM -0700, Mark Santaniello escreveu:
    > The idea here is to make AutoFDO easier in cloud environment with ASLR.
    > It's easiest to show how this is useful by example. I built a small test
    > akin to "while(1) { do_nothing(); }" where the do_nothing function is
    > loaded from a dso.  I sampled the execution with perf record -b.
    > 
    > Using the existing "brstack,dso", we get absolute addresses that are
    > affected by ASLR, and could be different on different hosts. The address
    > does not uniquely identify a branch/target in the binary:
    > $ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
    > > | grep burncpu | grep dso.so | head -n 1
    > 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0
    > 
    > Using the existing "brstacksym,dso" is a little better, because the
    > symbol plus offset and dso name *does* uniquely identify a branch/target
    > in the binary.  Ultimately, however, AutoFDO wants a simple offset into
    > the binary, so we'd have to undo all the work perf did to symbolize in
    > the first place:
    > $ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
    > > | grep burncpu | grep dso.so | head -n 1
    > do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0
    > 
    > With the new "brstackoff,dso" we get what we need: a simple offset into a
    > specific dso/binary that uniquely identifies a branch/target:
    > $ ./perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' \
    > > | grep burncpu | grep dso.so | head -n 1
    > 0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0
    > 
    > Note this patch depends on commit 9d0ec0748cd9.
    
    Are you refererring to patch 1/2 in this series?
    
    [acme@jouet linux]$ git show 9d0ec0748cd9
    fatal: ambiguous argument '9d0ec0748cd9': unknown revision or path not in the working tree.
    [acme@jouet linux]$
    
    Can you please provide the test proggie source code?
    
    Seems simple enough, but to save time for people trying to test your
    patch, it would be good to have it available.
    
    - Arnaldo

Hi Arnaldo,

Sorry for the missteps – I’m new to this process.  I am indeed referring to patch 1/2 in this series.
I didn’t realize the revision hash was only meaningful locally.

Below is the little program I am using to test this.

Thanks,
Mark


~/src/burncpu$ cat burncpu.cpp
#include <dlfcn.h>

int main() {
  void* handle = dlopen("./dso.so", RTLD_LAZY);
  if (!handle) return -1;

  typedef void (*fp)();
  fp do_nothing = (fp) dlsym(handle, "do_nothing");

  while(1) {
    do_nothing();
  }
}

~/src/burncpu$ cat dso.cpp
extern "C" void do_nothing() {}

~/src/burncpu$ cat build.sh
#!/bin/bash
g++ -shared dso.cpp -o dso.so
g++ burncpu.cpp -o burncpu -ldl



     
    > Signed-off-by: Mark Santaniello <marksan@fb.com>
    > ---
    >  tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++++++++++++++----
    >  1 file changed, 52 insertions(+), 4 deletions(-)
    > 
    > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
    > index 6a7033b..1effc64 100644
    > --- a/tools/perf/builtin-script.c
    > +++ b/tools/perf/builtin-script.c
    > @@ -85,6 +85,7 @@ enum perf_output_field {
    >  	PERF_OUTPUT_INSN	    = 1U << 21,
    >  	PERF_OUTPUT_INSNLEN	    = 1U << 22,
    >  	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
    > +	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
    >  };
    >  
    >  struct output_option {
    > @@ -115,6 +116,7 @@ struct output_option {
    >  	{.str = "insn", .field = PERF_OUTPUT_INSN},
    >  	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
    >  	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
    > +	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
    >  };
    >  
    >  /* default set to maintain compatibility with current format */
    > @@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
    >  		return -EINVAL;
    >  	}
    >  	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
    > -	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
    > -		pr_err("Display of DSO requested but none of sample IP, sample address, "
    > -		       "brstack\nor brstacksym are selected. Hence, no addresses to "
    > -		       "convert to DSO.\n");
    > +	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
    > +		pr_err("Display of DSO requested but no address to convert.  Select\n"
    > +		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
    >  		return -EINVAL;
    >  	}
    >  	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
    > @@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
    >  	}
    >  }
    >  
    > +static void print_sample_brstackoff(struct perf_sample *sample,
    > +				    struct thread *thread,
    > +				    struct perf_event_attr *attr)
    > +{
    > +	struct branch_stack *br = sample->branch_stack;
    > +	struct addr_location alf, alt;
    > +	u64 i, from, to;
    > +
    > +	if (!(br && br->nr))
    > +		return;
    > +
    > +	for (i = 0; i < br->nr; i++) {
    > +
    > +		memset(&alf, 0, sizeof(alf));
    > +		memset(&alt, 0, sizeof(alt));
    > +		from = br->entries[i].from;
    > +		to   = br->entries[i].to;
    > +
    > +		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
    > +		if (alf.map && !alf.map->dso->adjust_symbols)
    > +			from = map__map_ip(alf.map, from);
    > +
    > +		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
    > +		if (alt.map && !alt.map->dso->adjust_symbols)
    > +			to = map__map_ip(alt.map, to);
    > +
    > +		printf("0x%"PRIx64, from);
    > +		if (PRINT_FIELD(DSO)) {
    > +			printf("(");
    > +			map__fprintf_dsoname(alf.map, stdout);
    > +			printf(")");
    > +		}
    > +		printf("/0x%"PRIx64, to);
    > +		if (PRINT_FIELD(DSO)) {
    > +			printf("(");
    > +			map__fprintf_dsoname(alt.map, stdout);
    > +			printf(")");
    > +		}
    > +		printf("/%c/%c/%c/%d ",
    > +			mispred_str(br->entries + i),
    > +			br->entries[i].flags.in_tx ? 'X' : '-',
    > +			br->entries[i].flags.abort ? 'A' : '-',
    > +			br->entries[i].flags.cycles);
    > +	}
    > +}
    >  #define MAXBB 16384UL
    >  
    >  static int grab_bb(u8 *buffer, u64 start, u64 end,
    > @@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script,
    >  		print_sample_brstack(sample, thread, attr);
    >  	else if (PRINT_FIELD(BRSTACKSYM))
    >  		print_sample_brstacksym(sample, thread, attr);
    > +	else if (PRINT_FIELD(BRSTACKOFF))
    > +		print_sample_brstackoff(sample, thread, attr);
    >  
    >  	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
    >  		print_sample_bpf_output(sample);
    > -- 
    > 2.9.3
    

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-19 16:38 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello
@ 2017-06-19 16:38 ` Mark Santaniello
  2017-06-19 18:12   ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 7+ messages in thread
From: Mark Santaniello @ 2017-06-19 16:38 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin
  Cc: linux-kernel, Mark Santaniello

The idea here is to make AutoFDO easier in cloud environment with ASLR.
It's easiest to show how this is useful by example. I built a small test
akin to "while(1) { do_nothing(); }" where the do_nothing function is
loaded from a dso:

  ~/src/burncpu$ cat burncpu.cpp
  #include <dlfcn.h>

  int main() {
    void* handle = dlopen("./dso.so", RTLD_LAZY);
    if (!handle) return -1;

    typedef void (*fp)();
    fp do_nothing = (fp) dlsym(handle, "do_nothing");

    while(1) {
      do_nothing();
    }
  }

  ~/src/burncpu$ cat dso.cpp
  extern "C" void do_nothing() {}

  ~/src/burncpu$ cat build.sh
  #!/bin/bash
  g++ -shared dso.cpp -o dso.so
  g++ burncpu.cpp -o burncpu -ldl

I sampled the execution of this program with perf record -b.

Using the existing "brstack,dso", we get absolute addresses that are
affected by ASLR, and could be different on different hosts. The address
does not uniquely identify a branch/target in the binary:
  $ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
  > | grep burncpu | grep dso.so | head -n 1
  0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

Using the existing "brstacksym,dso" is a little better, because the
symbol plus offset and dso name *does* uniquely identify a branch/target
in the binary.  Ultimately, however, AutoFDO wants a simple offset into
the binary, so we'd have to undo all the work perf did to symbolize in
the first place:
  $ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
  > | grep burncpu | grep dso.so | head -n 1
  do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0

With the new "brstackoff,dso" we get what we need: a simple offset into a
specific dso/binary that uniquely identifies a branch/target:
  $ ./perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' \
  > | grep burncpu | grep dso.so | head -n 1
  0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

Signed-off-by: Mark Santaniello <marksan@fb.com>
---
 tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6a7033b..1effc64 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -85,6 +85,7 @@ enum perf_output_field {
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
 };
 
 struct output_option {
@@ -115,6 +116,7 @@ struct output_option {
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
 };
 
 /* default set to maintain compatibility with current format */
@@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
-	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
-		pr_err("Display of DSO requested but none of sample IP, sample address, "
-		       "brstack\nor brstacksym are selected. Hence, no addresses to "
-		       "convert to DSO.\n");
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+		pr_err("Display of DSO requested but no address to convert.  Select\n"
+		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_brstackoff(struct perf_sample *sample,
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map && !alf.map->dso->adjust_symbols)
+			from = map__map_ip(alf.map, from);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map && !alt.map->dso->adjust_symbols)
+			to = map__map_ip(alt.map, to);
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+		printf("/%c/%c/%c/%d ",
+			mispred_str(br->entries + i),
+			br->entries[i].flags.in_tx ? 'X' : '-',
+			br->entries[i].flags.abort ? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
 #define MAXBB 16384UL
 
 static int grab_bb(u8 *buffer, u64 start, u64 end,
@@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script,
 		print_sample_brstack(sample, thread, attr);
 	else if (PRINT_FIELD(BRSTACKSYM))
 		print_sample_brstacksym(sample, thread, attr);
+	else if (PRINT_FIELD(BRSTACKOFF))
+		print_sample_brstackoff(sample, thread, attr);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
@ 2017-06-19 18:12   ` Arnaldo Carvalho de Melo
  2017-06-20 15:10     ` Mark Santaniello
  0 siblings, 1 reply; 7+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-19 18:12 UTC (permalink / raw)
  To: Mark Santaniello
  Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, linux-kernel

Em Mon, Jun 19, 2017 at 09:38:25AM -0700, Mark Santaniello escreveu:
> With the new "brstackoff,dso" we get what we need: a simple offset into a
> specific dso/binary that uniquely identifies a branch/target:

You forgot to update tools/perf/Documentation/perf-script.txt about
'brstackoff', I added a line using the above explanation, please check
later and improve/expand it.

Thanks,

- Arnaldo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] Support perf script -F brstackoff,dso
  2017-06-19 18:12   ` Arnaldo Carvalho de Melo
@ 2017-06-20 15:10     ` Mark Santaniello
  0 siblings, 0 replies; 7+ messages in thread
From: Mark Santaniello @ 2017-06-20 15:10 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	linux-kernel@vger.kernel.org

On 6/19/17, 11:12 AM, "Arnaldo Carvalho de Melo" <acme@kernel.org> wrote:

    Em Mon, Jun 19, 2017 at 09:38:25AM -0700, Mark Santaniello escreveu:
    > With the new "brstackoff,dso" we get what we need: a simple offset into a
    > specific dso/binary that uniquely identifies a branch/target:
    
    You forgot to update tools/perf/Documentation/perf-script.txt about
    'brstackoff', I added a line using the above explanation, please check
    later and improve/expand it.
    
    Thanks,
    
    - Arnaldo

Arnaldo,

Thanks very much.  Will do.

-Mark    

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-06-20 15:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-12 22:51 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello
2017-06-12 22:51 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
2017-06-12 23:36   ` Arnaldo Carvalho de Melo
2017-06-13  3:53     ` Mark Santaniello
  -- strict thread matches above, loose matches on Subject: below --
2017-06-19 16:38 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello
2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
2017-06-19 18:12   ` Arnaldo Carvalho de Melo
2017-06-20 15:10     ` Mark Santaniello

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox