linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
@ 2024-02-27 23:48 Andi Kleen
  2024-02-27 23:48 ` [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Andi Kleen @ 2024-02-27 23:48 UTC (permalink / raw)
  To: linux-perf-users; +Cc: changbin.du, adrian.hunter, Andi Kleen

Use the DSO to resolve whether an IP is 32bit or 64bit and use that to
configure capstone to the correct mode. This allows to correctly
disassemble 32bit code under a 64bit OS.

% cat > loop.c
volatile int var;
int main(void)
{
	int i;
	for (i = 0; i < 100000; i++)
		var++;
}
% gcc -m32 -o loop loop.c
% perf record -e cycles:u ./loop
% perf script -F +disasm
            loop   82665 1833176.618023:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
            loop   82665 1833176.618029:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
            loop   82665 1833176.618031:          7 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
            loop   82665 1833176.618034:         91 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
            loop   82665 1833176.618036:       1242 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/print_insn.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index 459e0e93d7b1..bd7a95e64ce5 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -12,6 +12,8 @@
 #include "machine.h"
 #include "thread.h"
 #include "print_insn.h"
+#include "map.h"
+#include "dso.h"
 
 size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
 {
@@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
 #ifdef HAVE_LIBCAPSTONE_SUPPORT
 #include <capstone/capstone.h>
 
-static int capstone_init(struct machine *machine, csh *cs_handle)
+static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
 {
 	cs_arch arch;
 	cs_mode mode;
 
-	if (machine__is(machine, "x86_64")) {
+	if (machine__is(machine, "x86_64") && is64) {
 		arch = CS_ARCH_X86;
 		mode = CS_MODE_64;
 	} else if (machine__normalized_is(machine, "x86")) {
@@ -101,9 +103,21 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa
 	size_t count;
 	size_t printed = 0;
 	int ret;
+	struct addr_location al;
+	bool is64bit = machine__is(machine, "x86_64");
+	struct dso *dso;
+
+	addr_location__init(&al);
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
+		(dso = map__dso(al.map)) != NULL &&
+		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
+		map__load(al.map);
+		is64bit = dso->is_64_bit;
+	}
+	addr_location__exit(&al);
 
 	/* TODO: Try to initiate capstone only once but need a proper place. */
-	ret = capstone_init(machine, &cs_handle);
+	ret = capstone_init(machine, &cs_handle, is64bit);
 	if (ret < 0) {
 		/* fallback */
 		return sample__fprintf_insn_raw(sample, fp);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn
  2024-02-27 23:48 [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen
@ 2024-02-27 23:48 ` Andi Kleen
  2024-02-28 11:05   ` Changbin Du
  2024-02-28 11:00 ` [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Changbin Du
  2024-02-28 11:42 ` Adrian Hunter
  2 siblings, 1 reply; 9+ messages in thread
From: Andi Kleen @ 2024-02-27 23:48 UTC (permalink / raw)
  To: linux-perf-users; +Cc: changbin.du, adrian.hunter, Andi Kleen

Support capstone output for the -F +brstackinsn branch dump.
It is only enabled when -F +disasm is specified.
This was possible before with --xed, but now also allow
it for users that don't have xed using the builtin capstone support.

Before:

perf record -b emacs -Q --batch '()'
perf script -F +brstackinsn
...
          emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
        00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [3]
        00007f0ab2d17105        insn: 73 51
        00007f0ab2d17107        insn: 48 89 c1
        00007f0ab2d1710a        insn: 48 39 ca
        00007f0ab2d1710d        insn: 73 96
        00007f0ab2d1710f        insn: 48 8d 04 11
        00007f0ab2d17113        insn: 48 d1 e8
        00007f0ab2d17116        insn: 49 8d 34 c1
        00007f0ab2d1711a        insn: 44 3a 06
        00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [6] 3.00 IPC
        00007f0ab2d17105        insn: 73 51                     # PRED 1 cycles [7] 1.00 IPC
        00007f0ab2d17158        insn: 48 8d 50 01
        00007f0ab2d1715c        insn: eb 92                     # PRED 1 cycles [8] 2.00 IPC
        00007f0ab2d170f0        insn: 48 39 ca
        00007f0ab2d170f3        insn: 73 b0                     # PRED 1 cycles [9] 2.00 IPC

After (perf must be compiled with capstone):

perf script -F +brstackinsn,+disasm

...
           emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
        00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [3]
        00007f0ab2d17105        jae intel_check_word.constprop.0+0x128
        00007f0ab2d17107        movq %rax, %rcx
        00007f0ab2d1710a        cmpq %rcx, %rdx
        00007f0ab2d1710d        jae intel_check_word.constprop.0+0x75
        00007f0ab2d1710f        leaq (%rcx, %rdx), %rax
        00007f0ab2d17113        shrq $1, %rax
        00007f0ab2d17116        leaq (%r9, %rax, 8), %rsi
        00007f0ab2d1711a        cmpb (%rsi), %r8b
        00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [6] 3.00 IPC
        00007f0ab2d17105        jae intel_check_word.constprop.0+0x128  # PRED 1 cycles [7] 1.00 IPC
        00007f0ab2d17158        leaq 1(%rax), %rdx
        00007f0ab2d1715c        jmp intel_check_word.constprop.0+0xc0   # PRED 1 cycles [8] 2.00 IPC
        00007f0ab2d170f0        cmpq %rcx, %rdx
        00007f0ab2d170f3        jae intel_check_word.constprop.0+0x75   # PRED 1 cycles [9] 2.00 IPC

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/builtin-script.c  | 23 +++++++++++++---
 tools/perf/util/dump-insn.h  |  1 +
 tools/perf/util/print_insn.c | 52 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/print_insn.h |  3 +++
 4 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 37088cc0ff1b..f18bcf61be8b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1162,6 +1162,20 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr)
 	return ret;
 }
 
+static const char *any_dump_insn(struct perf_event_attr *attr,
+			struct perf_insn *x, uint64_t ip,
+			u8 *inbuf, int inlen, int *lenp)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+	if (PRINT_FIELD(DISASM)) {
+		const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp);
+		if (p)
+			return p;
+	}
+#endif
+	return dump_insn(x, ip, inbuf, inlen, lenp);
+}
+
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			    struct perf_insn *x, u8 *inbuf, int len,
 			    int insn, FILE *fp, int *total_cycles,
@@ -1170,7 +1184,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 {
 	int ilen = 0;
 	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
-			      dump_insn(x, ip, inbuf, len, &ilen));
+			      any_dump_insn(attr, x, ip, inbuf, len, &ilen));
 
 	if (PRINT_FIELD(BRSTACKINSNLEN))
 		printed += fprintf(fp, "ilen: %d\t", ilen);
@@ -1262,6 +1276,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		nr = max_blocks + 1;
 
 	x.thread = thread;
+	x.machine = machine;
 	x.cpu = sample->cpu;
 
 	printed += fprintf(fp, "%c", '\n');
@@ -1313,7 +1328,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			} else {
 				ilen = 0;
 				printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
-						   dump_insn(&x, ip, buffer + off, len - off, &ilen));
+						   any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen));
 				if (PRINT_FIELD(BRSTACKINSNLEN))
 					printed += fprintf(fp, "\tilen: %d", ilen);
 				printed += fprintf(fp, "\n");
@@ -1361,7 +1376,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			goto out;
 		ilen = 0;
 		printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
-			dump_insn(&x, sample->ip, buffer, len, &ilen));
+			any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen));
 		if (PRINT_FIELD(BRSTACKINSNLEN))
 			printed += fprintf(fp, "\tilen: %d", ilen);
 		printed += fprintf(fp, "\n");
@@ -1372,7 +1387,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	for (off = 0; off <= end - start; off += ilen) {
 		ilen = 0;
 		printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
-				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+				   any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen));
 		if (PRINT_FIELD(BRSTACKINSNLEN))
 			printed += fprintf(fp, "\tilen: %d", ilen);
 		printed += fprintf(fp, "\n");
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 650125061530..4a7797dd6d09 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -11,6 +11,7 @@ struct thread;
 struct perf_insn {
 	/* Initialized by callers: */
 	struct thread *thread;
+	struct machine *machine;
 	u8	      cpumode;
 	bool	      is64bit;
 	int	      cpu;
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index bd7a95e64ce5..35785ab22c07 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -12,6 +12,7 @@
 #include "machine.h"
 #include "thread.h"
 #include "print_insn.h"
+#include "dump-insn.h"
 #include "map.h"
 #include "dso.h"
 
@@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
 	return 0;
 }
 
+static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x)
+{
+	struct addr_location al;
+	bool printed = false;
+
+	if (insn->detail && insn->detail->x86.op_count == 1) {
+		cs_x86_op *op = &insn->detail->x86.operands[0];
+
+		addr_location__init(&al);
+		if (op->type == X86_OP_IMM &&
+		    thread__find_symbol(thread, x->cpumode, op->imm, &al) &&
+		    al.sym &&
+		    al.addr < al.sym->end) {
+			snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic,
+					al.sym->name, al.addr - al.sym->start, op->imm);
+			printed = true;
+		}
+		addr_location__exit(&al);
+	}
+
+	if (!printed)
+		snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str);
+}
+
+const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
+			 u8 *inbuf, int inlen, int *lenp)
+{
+	int ret;
+	int count;
+	cs_insn *insn;
+	csh cs_handle;
+
+	ret = capstone_init(x->machine, &cs_handle, x->is64bit);
+	if (ret < 0)
+		return NULL;
+
+	count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn);
+	if (count > 0) {
+		if (machine__normalized_is(x->machine, "x86"))
+			dump_insn_x86(x->thread, &insn[0], x);
+		else
+			snprintf(x->out, sizeof(x->out), "%s %s",
+					insn[0].mnemonic, insn[0].op_str);
+		*lenp = insn->size;
+		cs_free(insn, count);
+	} else {
+		return NULL;
+	}
+	return x->out;
+}
+
 static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
 			     cs_insn *insn, FILE *fp)
 {
diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h
index 465bdcfcc2fd..135d78322f71 100644
--- a/tools/perf/util/print_insn.h
+++ b/tools/perf/util/print_insn.h
@@ -8,9 +8,12 @@
 struct perf_sample;
 struct thread;
 struct machine;
+struct perf_insn;
 
 size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
 				struct machine *machine, FILE *fp);
 size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
+const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
+                         u8 *inbuf, int inlen, int *lenp);
 
 #endif /* PERF_PRINT_INSN_H */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
  2024-02-27 23:48 [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen
  2024-02-27 23:48 ` [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn Andi Kleen
@ 2024-02-28 11:00 ` Changbin Du
  2024-02-28 23:29   ` Andi Kleen
  2024-02-28 11:42 ` Adrian Hunter
  2 siblings, 1 reply; 9+ messages in thread
From: Changbin Du @ 2024-02-28 11:00 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-perf-users, changbin.du, adrian.hunter

On Tue, Feb 27, 2024 at 03:48:04PM -0800, Andi Kleen wrote:
> Use the DSO to resolve whether an IP is 32bit or 64bit and use that to
> configure capstone to the correct mode. This allows to correctly
> disassemble 32bit code under a 64bit OS.
> 
> % cat > loop.c
> volatile int var;
> int main(void)
> {
> 	int i;
> 	for (i = 0; i < 100000; i++)
> 		var++;
> }
> % gcc -m32 -o loop loop.c
> % perf record -e cycles:u ./loop
> % perf script -F +disasm
>             loop   82665 1833176.618023:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618029:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618031:          7 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618034:         91 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618036:       1242 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> 
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/util/print_insn.c | 20 +++++++++++++++++---
>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
> index 459e0e93d7b1..bd7a95e64ce5 100644
> --- a/tools/perf/util/print_insn.c
> +++ b/tools/perf/util/print_insn.c
> @@ -12,6 +12,8 @@
>  #include "machine.h"
>  #include "thread.h"
>  #include "print_insn.h"
> +#include "map.h"
> +#include "dso.h"
>  
>  size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
>  {
> @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
>  #ifdef HAVE_LIBCAPSTONE_SUPPORT
>  #include <capstone/capstone.h>
>  
> -static int capstone_init(struct machine *machine, csh *cs_handle)
> +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
>  {
>  	cs_arch arch;
>  	cs_mode mode;
>  
> -	if (machine__is(machine, "x86_64")) {
> +	if (machine__is(machine, "x86_64") && is64) {
>  		arch = CS_ARCH_X86;
>  		mode = CS_MODE_64;
>  	} else if (machine__normalized_is(machine, "x86")) {
> @@ -101,9 +103,21 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa
>  	size_t count;
>  	size_t printed = 0;
>  	int ret;
> +	struct addr_location al;
> +	bool is64bit = machine__is(machine, "x86_64");
> +	struct dso *dso;
> +
> +	addr_location__init(&al);
> +	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
> +		(dso = map__dso(al.map)) != NULL &&
> +		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
> +		map__load(al.map);
> +		is64bit = dso->is_64_bit;
> +	}
> +	addr_location__exit(&al);
>
This could be extracted as a standalone function. And this should apply to arm64
also.

>  	/* TODO: Try to initiate capstone only once but need a proper place. */
> -	ret = capstone_init(machine, &cs_handle);
> +	ret = capstone_init(machine, &cs_handle, is64bit);
>  	if (ret < 0) {
>  		/* fallback */
>  		return sample__fprintf_insn_raw(sample, fp);
> -- 
> 2.43.0
> 

-- 
Cheers,
Changbin Du

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn
  2024-02-27 23:48 ` [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn Andi Kleen
@ 2024-02-28 11:05   ` Changbin Du
  2024-02-28 23:26     ` Andi Kleen
  0 siblings, 1 reply; 9+ messages in thread
From: Changbin Du @ 2024-02-28 11:05 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-perf-users, changbin.du, adrian.hunter

On Tue, Feb 27, 2024 at 03:48:05PM -0800, Andi Kleen wrote:
> Support capstone output for the -F +brstackinsn branch dump.
> It is only enabled when -F +disasm is specified.
> This was possible before with --xed, but now also allow
> it for users that don't have xed using the builtin capstone support.
By this, 'disasm' acts as a flag but not a field any more.

> Before:
> 
> perf record -b emacs -Q --batch '()'
> perf script -F +brstackinsn
> ...
>           emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
>         00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [3]
>         00007f0ab2d17105        insn: 73 51
>         00007f0ab2d17107        insn: 48 89 c1
>         00007f0ab2d1710a        insn: 48 39 ca
>         00007f0ab2d1710d        insn: 73 96
>         00007f0ab2d1710f        insn: 48 8d 04 11
>         00007f0ab2d17113        insn: 48 d1 e8
>         00007f0ab2d17116        insn: 49 8d 34 c1
>         00007f0ab2d1711a        insn: 44 3a 06
>         00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [6] 3.00 IPC
>         00007f0ab2d17105        insn: 73 51                     # PRED 1 cycles [7] 1.00 IPC
>         00007f0ab2d17158        insn: 48 8d 50 01
>         00007f0ab2d1715c        insn: eb 92                     # PRED 1 cycles [8] 2.00 IPC
>         00007f0ab2d170f0        insn: 48 39 ca
>         00007f0ab2d170f3        insn: 73 b0                     # PRED 1 cycles [9] 2.00 IPC
> 
> After (perf must be compiled with capstone):
> 
> perf script -F +brstackinsn,+disasm
> 
> ...
>            emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
>         00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [3]
>         00007f0ab2d17105        jae intel_check_word.constprop.0+0x128
>         00007f0ab2d17107        movq %rax, %rcx
>         00007f0ab2d1710a        cmpq %rcx, %rdx
>         00007f0ab2d1710d        jae intel_check_word.constprop.0+0x75
>         00007f0ab2d1710f        leaq (%rcx, %rdx), %rax
>         00007f0ab2d17113        shrq $1, %rax
>         00007f0ab2d17116        leaq (%r9, %rax, 8), %rsi
>         00007f0ab2d1711a        cmpb (%rsi), %r8b
>         00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [6] 3.00 IPC
>         00007f0ab2d17105        jae intel_check_word.constprop.0+0x128  # PRED 1 cycles [7] 1.00 IPC
>         00007f0ab2d17158        leaq 1(%rax), %rdx
>         00007f0ab2d1715c        jmp intel_check_word.constprop.0+0xc0   # PRED 1 cycles [8] 2.00 IPC
>         00007f0ab2d170f0        cmpq %rcx, %rdx
>         00007f0ab2d170f3        jae intel_check_word.constprop.0+0x75   # PRED 1 cycles [9] 2.00 IPC
> 
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/builtin-script.c  | 23 +++++++++++++---
>  tools/perf/util/dump-insn.h  |  1 +
>  tools/perf/util/print_insn.c | 52 ++++++++++++++++++++++++++++++++++++
>  tools/perf/util/print_insn.h |  3 +++
>  4 files changed, 75 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 37088cc0ff1b..f18bcf61be8b 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -1162,6 +1162,20 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr)
>  	return ret;
>  }
>  
> +static const char *any_dump_insn(struct perf_event_attr *attr,
> +			struct perf_insn *x, uint64_t ip,
> +			u8 *inbuf, int inlen, int *lenp)
> +{
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> +	if (PRINT_FIELD(DISASM)) {
> +		const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp);
> +		if (p)
> +			return p;
> +	}
> +#endif
> +	return dump_insn(x, ip, inbuf, inlen, lenp);
> +}
> +
>  static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
>  			    struct perf_insn *x, u8 *inbuf, int len,
>  			    int insn, FILE *fp, int *total_cycles,
> @@ -1170,7 +1184,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
>  {
>  	int ilen = 0;
>  	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
> -			      dump_insn(x, ip, inbuf, len, &ilen));
> +			      any_dump_insn(attr, x, ip, inbuf, len, &ilen));
>  
>  	if (PRINT_FIELD(BRSTACKINSNLEN))
>  		printed += fprintf(fp, "ilen: %d\t", ilen);
> @@ -1262,6 +1276,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  		nr = max_blocks + 1;
>  
>  	x.thread = thread;
> +	x.machine = machine;
>  	x.cpu = sample->cpu;
>  
>  	printed += fprintf(fp, "%c", '\n');
> @@ -1313,7 +1328,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  			} else {
>  				ilen = 0;
>  				printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
> -						   dump_insn(&x, ip, buffer + off, len - off, &ilen));
> +						   any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen));
>  				if (PRINT_FIELD(BRSTACKINSNLEN))
>  					printed += fprintf(fp, "\tilen: %d", ilen);
>  				printed += fprintf(fp, "\n");
> @@ -1361,7 +1376,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  			goto out;
>  		ilen = 0;
>  		printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
> -			dump_insn(&x, sample->ip, buffer, len, &ilen));
> +			any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen));
>  		if (PRINT_FIELD(BRSTACKINSNLEN))
>  			printed += fprintf(fp, "\tilen: %d", ilen);
>  		printed += fprintf(fp, "\n");
> @@ -1372,7 +1387,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  	for (off = 0; off <= end - start; off += ilen) {
>  		ilen = 0;
>  		printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
> -				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
> +				   any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen));
>  		if (PRINT_FIELD(BRSTACKINSNLEN))
>  			printed += fprintf(fp, "\tilen: %d", ilen);
>  		printed += fprintf(fp, "\n");
> diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
> index 650125061530..4a7797dd6d09 100644
> --- a/tools/perf/util/dump-insn.h
> +++ b/tools/perf/util/dump-insn.h
> @@ -11,6 +11,7 @@ struct thread;
>  struct perf_insn {
>  	/* Initialized by callers: */
>  	struct thread *thread;
> +	struct machine *machine;
>  	u8	      cpumode;
>  	bool	      is64bit;
>  	int	      cpu;
> diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
> index bd7a95e64ce5..35785ab22c07 100644
> --- a/tools/perf/util/print_insn.c
> +++ b/tools/perf/util/print_insn.c
> @@ -12,6 +12,7 @@
>  #include "machine.h"
>  #include "thread.h"
>  #include "print_insn.h"
> +#include "dump-insn.h"
>  #include "map.h"
>  #include "dso.h"
>  
> @@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
>  	return 0;
>  }
>  
> +static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x)
> +{
> +	struct addr_location al;
> +	bool printed = false;
> +
> +	if (insn->detail && insn->detail->x86.op_count == 1) {
> +		cs_x86_op *op = &insn->detail->x86.operands[0];
> +
> +		addr_location__init(&al);
> +		if (op->type == X86_OP_IMM &&
> +		    thread__find_symbol(thread, x->cpumode, op->imm, &al) &&
> +		    al.sym &&
> +		    al.addr < al.sym->end) {
> +			snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic,
> +					al.sym->name, al.addr - al.sym->start, op->imm);
> +			printed = true;
> +		}
> +		addr_location__exit(&al);
> +	}
> +
> +	if (!printed)
> +		snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str);
> +}
> +
> +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
> +			 u8 *inbuf, int inlen, int *lenp)
> +{
> +	int ret;
> +	int count;
> +	cs_insn *insn;
> +	csh cs_handle;
> +
> +	ret = capstone_init(x->machine, &cs_handle, x->is64bit);
> +	if (ret < 0)
> +		return NULL;
> +
> +	count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn);
> +	if (count > 0) {
> +		if (machine__normalized_is(x->machine, "x86"))
> +			dump_insn_x86(x->thread, &insn[0], x);
> +		else
> +			snprintf(x->out, sizeof(x->out), "%s %s",
> +					insn[0].mnemonic, insn[0].op_str);
> +		*lenp = insn->size;
> +		cs_free(insn, count);
> +	} else {
> +		return NULL;
> +	}
> +	return x->out;
> +}
Most of above codes are duplicated. The difference between dumping and printing is
only the output target. So the could share common code.

> +
>  static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
>  			     cs_insn *insn, FILE *fp)
>  {
> diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h
> index 465bdcfcc2fd..135d78322f71 100644
> --- a/tools/perf/util/print_insn.h
> +++ b/tools/perf/util/print_insn.h
> @@ -8,9 +8,12 @@
>  struct perf_sample;
>  struct thread;
>  struct machine;
> +struct perf_insn;
>  
>  size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
>  				struct machine *machine, FILE *fp);
>  size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
> +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
> +                         u8 *inbuf, int inlen, int *lenp);
>  
>  #endif /* PERF_PRINT_INSN_H */
> -- 
> 2.43.0
> 

-- 
Cheers,
Changbin Du

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
  2024-02-27 23:48 [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen
  2024-02-27 23:48 ` [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn Andi Kleen
  2024-02-28 11:00 ` [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Changbin Du
@ 2024-02-28 11:42 ` Adrian Hunter
  2024-02-28 23:30   ` Andi Kleen
  2 siblings, 1 reply; 9+ messages in thread
From: Adrian Hunter @ 2024-02-28 11:42 UTC (permalink / raw)
  To: Andi Kleen, linux-perf-users; +Cc: changbin.du

On 28/02/24 01:48, Andi Kleen wrote:
> Use the DSO to resolve whether an IP is 32bit or 64bit and use that to
> configure capstone to the correct mode. This allows to correctly
> disassemble 32bit code under a 64bit OS.
> 
> % cat > loop.c
> volatile int var;
> int main(void)
> {
> 	int i;
> 	for (i = 0; i < 100000; i++)
> 		var++;
> }
> % gcc -m32 -o loop loop.c
> % perf record -e cycles:u ./loop
> % perf script -F +disasm
>             loop   82665 1833176.618023:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618029:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618031:          7 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618034:         91 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
>             loop   82665 1833176.618036:       1242 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> 
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/util/print_insn.c | 20 +++++++++++++++++---
>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
> index 459e0e93d7b1..bd7a95e64ce5 100644
> --- a/tools/perf/util/print_insn.c
> +++ b/tools/perf/util/print_insn.c
> @@ -12,6 +12,8 @@
>  #include "machine.h"
>  #include "thread.h"
>  #include "print_insn.h"
> +#include "map.h"
> +#include "dso.h"
>  
>  size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
>  {
> @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
>  #ifdef HAVE_LIBCAPSTONE_SUPPORT
>  #include <capstone/capstone.h>
>  
> -static int capstone_init(struct machine *machine, csh *cs_handle)
> +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
>  {
>  	cs_arch arch;
>  	cs_mode mode;
>  
> -	if (machine__is(machine, "x86_64")) {
> +	if (machine__is(machine, "x86_64") && is64) {
>  		arch = CS_ARCH_X86;
>  		mode = CS_MODE_64;
>  	} else if (machine__normalized_is(machine, "x86")) {
> @@ -101,9 +103,21 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa
>  	size_t count;
>  	size_t printed = 0;
>  	int ret;
> +	struct addr_location al;
> +	bool is64bit = machine__is(machine, "x86_64");
> +	struct dso *dso;
> +
> +	addr_location__init(&al);
> +	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
> +		(dso = map__dso(al.map)) != NULL &&
> +		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
> +		map__load(al.map);
> +		is64bit = dso->is_64_bit;
> +	}
> +	addr_location__exit(&al);

Maybe 'al' could be passed down through perf_sample__fprintf_insn()

>  
>  	/* TODO: Try to initiate capstone only once but need a proper place. */
> -	ret = capstone_init(machine, &cs_handle);
> +	ret = capstone_init(machine, &cs_handle, is64bit);
>  	if (ret < 0) {
>  		/* fallback */
>  		return sample__fprintf_insn_raw(sample, fp);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn
  2024-02-28 11:05   ` Changbin Du
@ 2024-02-28 23:26     ` Andi Kleen
  0 siblings, 0 replies; 9+ messages in thread
From: Andi Kleen @ 2024-02-28 23:26 UTC (permalink / raw)
  To: Changbin Du; +Cc: linux-perf-users, adrian.hunter

On Wed, Feb 28, 2024 at 07:05:34PM +0800, Changbin Du wrote:
> On Tue, Feb 27, 2024 at 03:48:05PM -0800, Andi Kleen wrote:
> > Support capstone output for the -F +brstackinsn branch dump.
> > It is only enabled when -F +disasm is specified.
> > This was possible before with --xed, but now also allow
> > it for users that don't have xed using the builtin capstone support.
> By this, 'disasm' acts as a flag but not a field any mor

Yes that was intentional. 

You would prefer to define brstackdisasm instead?

-And

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
  2024-02-28 11:00 ` [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Changbin Du
@ 2024-02-28 23:29   ` Andi Kleen
  0 siblings, 0 replies; 9+ messages in thread
From: Andi Kleen @ 2024-02-28 23:29 UTC (permalink / raw)
  To: Changbin Du; +Cc: linux-perf-users, adrian.hunter

On Wed, Feb 28, 2024 at 07:00:06PM +0800, Changbin Du wrote:
> On Tue, Feb 27, 2024 at 03:48:04PM -0800, Andi Kleen wrote:
> > Use the DSO to resolve whether an IP is 32bit or 64bit and use that to
> > configure capstone to the correct mode. This allows to correctly
> > disassemble 32bit code under a 64bit OS.
> > 
> > % cat > loop.c
> > volatile int var;
> > int main(void)
> > {
> > 	int i;
> > 	for (i = 0; i < 100000; i++)
> > 		var++;
> > }
> > % gcc -m32 -o loop loop.c
> > % perf record -e cycles:u ./loop
> > % perf script -F +disasm
> >             loop   82665 1833176.618023:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> >             loop   82665 1833176.618029:          1 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> >             loop   82665 1833176.618031:          7 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> >             loop   82665 1833176.618034:         91 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> >             loop   82665 1833176.618036:       1242 cycles:u:          f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2)             movl %esp, %eax
> > 
> > Signed-off-by: Andi Kleen <ak@linux.intel.com>
> > ---
> >  tools/perf/util/print_insn.c | 20 +++++++++++++++++---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
> > index 459e0e93d7b1..bd7a95e64ce5 100644
> > --- a/tools/perf/util/print_insn.c
> > +++ b/tools/perf/util/print_insn.c
> > @@ -12,6 +12,8 @@
> >  #include "machine.h"
> >  #include "thread.h"
> >  #include "print_insn.h"
> > +#include "map.h"
> > +#include "dso.h"
> >  
> >  size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
> >  {
> > @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
> >  #ifdef HAVE_LIBCAPSTONE_SUPPORT
> >  #include <capstone/capstone.h>
> >  
> > -static int capstone_init(struct machine *machine, csh *cs_handle)
> > +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
> >  {
> >  	cs_arch arch;
> >  	cs_mode mode;
> >  
> > -	if (machine__is(machine, "x86_64")) {
> > +	if (machine__is(machine, "x86_64") && is64) {
> >  		arch = CS_ARCH_X86;
> >  		mode = CS_MODE_64;
> >  	} else if (machine__normalized_is(machine, "x86")) {
> > @@ -101,9 +103,21 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa
> >  	size_t count;
> >  	size_t printed = 0;
> >  	int ret;
> > +	struct addr_location al;
> > +	bool is64bit = machine__is(machine, "x86_64");
> > +	struct dso *dso;
> > +
> > +	addr_location__init(&al);
> > +	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
> > +		(dso = map__dso(al.map)) != NULL &&
> > +		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
> > +		map__load(al.map);
> > +		is64bit = dso->is_64_bit;
> > +	}
> > +	addr_location__exit(&al);
> >
> This could be extracted as a standalone function. And this should apply to arm64
> also.

It should work for ARM64 too, at least for the sample case which it
supports. I haven't tested it however because I don't have such a
system.

-Andi

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
  2024-02-28 11:42 ` Adrian Hunter
@ 2024-02-28 23:30   ` Andi Kleen
  2024-02-29  7:02     ` Adrian Hunter
  0 siblings, 1 reply; 9+ messages in thread
From: Andi Kleen @ 2024-02-28 23:30 UTC (permalink / raw)
  To: Adrian Hunter; +Cc: linux-perf-users, changbin.du

> > +	bool is64bit = machine__is(machine, "x86_64");
> > +	struct dso *dso;
> > +
> > +	addr_location__init(&al);
> > +	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
> > +		(dso = map__dso(al.map)) != NULL &&
> > +		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
> > +		map__load(al.map);
> > +		is64bit = dso->is_64_bit;
> > +	}
> > +	addr_location__exit(&al);
> 
> Maybe 'al' could be passed down through perf_sample__fprintf_insn()
> 

These are different addresses -- one is for the IP and the other is for
the immediate. It seems cleaner to support separate ones.

-Andi

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS
  2024-02-28 23:30   ` Andi Kleen
@ 2024-02-29  7:02     ` Adrian Hunter
  0 siblings, 0 replies; 9+ messages in thread
From: Adrian Hunter @ 2024-02-29  7:02 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-perf-users, changbin.du

On 29/02/24 01:30, Andi Kleen wrote:
>>> +	bool is64bit = machine__is(machine, "x86_64");
>>> +	struct dso *dso;
>>> +
>>> +	addr_location__init(&al);
>>> +	if (thread__find_map(thread, sample->cpumode, sample->ip, &al) &&
>>> +		(dso = map__dso(al.map)) != NULL &&
>>> +		(dso->data.status != DSO_DATA_STATUS_ERROR)) {
>>> +		map__load(al.map);
>>> +		is64bit = dso->is_64_bit;
>>> +	}
>>> +	addr_location__exit(&al);
>>
>> Maybe 'al' could be passed down through perf_sample__fprintf_insn()
>>
> 
> These are different addresses -- one is for the IP and the other is for
> the immediate. It seems cleaner to support separate ones.

They are both sample->ip


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-02-29  7:02 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-27 23:48 [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen
2024-02-27 23:48 ` [PATCH 2/2] perf, capstone: Support capstone for -F +brstackinsn Andi Kleen
2024-02-28 11:05   ` Changbin Du
2024-02-28 23:26     ` Andi Kleen
2024-02-28 11:00 ` [PATCH 1/2] perf, capstone: Support 32bit code under 64bit OS Changbin Du
2024-02-28 23:29   ` Andi Kleen
2024-02-28 11:42 ` Adrian Hunter
2024-02-28 23:30   ` Andi Kleen
2024-02-29  7:02     ` Adrian Hunter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).