* [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS
@ 2024-03-08 17:06 Andi Kleen
2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Andi Kleen @ 2024-03-08 17:06 UTC (permalink / raw)
To: linux-perf-users; +Cc: adrian.hunter, changbin.du, Andi Kleen
Use the DSO to resolve whether an IP is 32bit or 64bit and use that to
configure capstone to the correct mode. This allows to correctly
disassemble 32bit code under a 64bit OS.
% cat > loop.c
volatile int var;
int main(void)
{
int i;
for (i = 0; i < 100000; i++)
var++;
}
% gcc -m32 -o loop loop.c
% perf record -e cycles:u ./loop
% perf script -F +disasm
loop 82665 1833176.618023: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax
loop 82665 1833176.618029: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax
loop 82665 1833176.618031: 7 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax
loop 82665 1833176.618034: 91 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax
loop 82665 1833176.618036: 1242 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
v2: Factor out DSO lookup into separate function
v3: Pass down al
v4: Simplify is64bitip
---
tools/perf/builtin-script.c | 9 +++++----
tools/perf/util/print_insn.c | 24 ++++++++++++++++++++----
tools/perf/util/print_insn.h | 2 +-
3 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 37088cc0ff1b..0299b1ed8744 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1517,7 +1517,8 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
static int perf_sample__fprintf_insn(struct perf_sample *sample,
struct perf_event_attr *attr,
struct thread *thread,
- struct machine *machine, FILE *fp)
+ struct machine *machine, FILE *fp,
+ struct addr_location *al)
{
int printed = 0;
@@ -1531,7 +1532,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
}
if (PRINT_FIELD(DISASM) && sample->insn_len) {
printed += fprintf(fp, "\t\t");
- printed += sample__fprintf_insn_asm(sample, thread, machine, fp);
+ printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
}
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
@@ -1606,7 +1607,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
if (print_srcline_last)
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
- printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+ printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
int ret = map__fprintf_srccode(al->map, al->addr, stdout,
@@ -2259,7 +2260,7 @@ static void process_event(struct perf_script *script,
if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
perf_sample__fprintf_bpf_output(sample, fp);
- perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+ perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index 459e0e93d7b1..8e4e3cffd677 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -12,6 +12,8 @@
#include "machine.h"
#include "thread.h"
#include "print_insn.h"
+#include "map.h"
+#include "dso.h"
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
{
@@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
-static int capstone_init(struct machine *machine, csh *cs_handle)
+static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
{
cs_arch arch;
cs_mode mode;
- if (machine__is(machine, "x86_64")) {
+ if (machine__is(machine, "x86_64") && is64) {
arch = CS_ARCH_X86;
mode = CS_MODE_64;
} else if (machine__normalized_is(machine, "x86")) {
@@ -93,17 +95,31 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
return printed;
}
+static bool is64bitip(struct machine *machine, struct addr_location *al)
+{
+ const struct dso *dso = al->map ? map__dso(al->map) : NULL;
+
+ if (dso)
+ return dso->is_64_bit;
+
+ return machine__is(machine, "x86_64") ||
+ machine__normalized_is(machine, "arm64") ||
+ machine__normalized_is(machine, "s390");
+}
+
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
- struct machine *machine, FILE *fp)
+ struct machine *machine, FILE *fp,
+ struct addr_location *al)
{
csh cs_handle;
cs_insn *insn;
size_t count;
size_t printed = 0;
int ret;
+ bool is64bit = is64bitip(machine, al);
/* TODO: Try to initiate capstone only once but need a proper place. */
- ret = capstone_init(machine, &cs_handle);
+ ret = capstone_init(machine, &cs_handle, is64bit);
if (ret < 0) {
/* fallback */
return sample__fprintf_insn_raw(sample, fp);
diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h
index 465bdcfcc2fd..6447dd41b543 100644
--- a/tools/perf/util/print_insn.h
+++ b/tools/perf/util/print_insn.h
@@ -10,7 +10,7 @@ struct thread;
struct machine;
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
- struct machine *machine, FILE *fp);
+ struct machine *machine, FILE *fp, struct addr_location *al);
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
#endif /* PERF_PRINT_INSN_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-08 17:06 [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen @ 2024-03-08 17:06 ` Andi Kleen 2024-03-15 12:19 ` Adrian Hunter 2024-03-22 10:21 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Adrian Hunter 2024-03-15 12:19 ` [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Adrian Hunter 2024-03-22 10:21 ` Adrian Hunter 2 siblings, 2 replies; 12+ messages in thread From: Andi Kleen @ 2024-03-08 17:06 UTC (permalink / raw) To: linux-perf-users; +Cc: adrian.hunter, changbin.du, Andi Kleen Support capstone output for the -F +brstackinsn branch dump. The new output is enabled with the new field brstackdisasm This was possible before with --xed, but now also allow it for users that don't have xed using the builtin capstone support. Before: perf record -b emacs -Q --batch '()' perf script -F +brstackinsn ... emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3] 00007f0ab2d17105 insn: 73 51 00007f0ab2d17107 insn: 48 89 c1 00007f0ab2d1710a insn: 48 39 ca 00007f0ab2d1710d insn: 73 96 00007f0ab2d1710f insn: 48 8d 04 11 00007f0ab2d17113 insn: 48 d1 e8 00007f0ab2d17116 insn: 49 8d 34 c1 00007f0ab2d1711a insn: 44 3a 06 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC 00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC 00007f0ab2d17158 insn: 48 8d 50 01 00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC 00007f0ab2d170f0 insn: 48 39 ca 00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC After (perf must be compiled with capstone): perf script -F +brstackdisasm ... emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3] 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 00007f0ab2d17107 movq %rax, %rcx 00007f0ab2d1710a cmpq %rcx, %rdx 00007f0ab2d1710d jae intel_check_word.constprop.0+0x75 00007f0ab2d1710f leaq (%rcx, %rdx), %rax 00007f0ab2d17113 shrq $1, %rax 00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi 00007f0ab2d1711a cmpb (%rsi), %r8b 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC 00007f0ab2d17158 leaq 1(%rax), %rdx 00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC 00007f0ab2d170f0 cmpq %rcx, %rdx 00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC Signed-off-by: Andi Kleen <ak@linux.intel.com> --- v2: Use brstackdisasm instead of keying of disasm --- tools/perf/Documentation/perf-script.txt | 7 +++- tools/perf/builtin-script.c | 32 +++++++++++---- tools/perf/util/dump-insn.h | 1 + tools/perf/util/print_insn.c | 52 ++++++++++++++++++++++++ tools/perf/util/print_insn.h | 3 ++ 5 files changed, 86 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 005e51df855e..ff086ef05a0c 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -132,9 +132,9 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, + brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. @@ -257,6 +257,9 @@ OPTIONS can’t know the next sequential instruction after an unconditional branch unless you calculate that based on its length. + brstackdisasm acts like brstackinsn, but will print disassembled instructions if + perf is built with the capstone library. + The brstackoff field will print an offset into a specific dso/binary. With the metric option perf script can compute metrics for diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0299b1ed8744..db18d2c54c59 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -136,6 +136,7 @@ enum perf_output_field { PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, PERF_OUTPUT_DSOFF = 1ULL << 41, PERF_OUTPUT_DISASM = 1ULL << 42, + PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, }; struct perf_script { @@ -210,6 +211,7 @@ struct output_option { {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, + {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, }; enum { @@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) + && !allow_user_set && !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" "Hint: run 'perf record -b ...'\n"); @@ -1162,6 +1165,20 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) return ret; } +static const char *any_dump_insn(struct perf_event_attr *attr, + struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + if (PRINT_FIELD(BRSTACKDISASM)) { + const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); + if (p) + return p; + } +#endif + return dump_insn(x, ip, inbuf, inlen, lenp); +} + static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, int insn, FILE *fp, int *total_cycles, @@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, { int ilen = 0; int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, - dump_insn(x, ip, inbuf, len, &ilen)); + any_dump_insn(attr, x, ip, inbuf, len, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "ilen: %d\t", ilen); @@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, nr = max_blocks + 1; x.thread = thread; + x.machine = machine; x.cpu = sample->cpu; printed += fprintf(fp, "%c", '\n'); @@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, } else { ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, - dump_insn(&x, ip, buffer + off, len - off, &ilen)); + any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, goto out; ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, - dump_insn(&x, sample->ip, buffer, len, &ilen)); + any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, for (off = 0; off <= end - start; off += ilen) { ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, printed += fprintf(fp, "\t\t"); printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); } - if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); return printed; @@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," - "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," + "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", parse_output_fields), diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 650125061530..4a7797dd6d09 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -11,6 +11,7 @@ struct thread; struct perf_insn { /* Initialized by callers: */ struct thread *thread; + struct machine *machine; u8 cpumode; bool is64bit; int cpu; diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c index 8e4e3cffd677..bca4449f0fa8 100644 --- a/tools/perf/util/print_insn.c +++ b/tools/perf/util/print_insn.c @@ -12,6 +12,7 @@ #include "machine.h" #include "thread.h" #include "print_insn.h" +#include "dump-insn.h" #include "map.h" #include "dso.h" @@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) return 0; } +static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) +{ + struct addr_location al; + bool printed = false; + + if (insn->detail && insn->detail->x86.op_count == 1) { + cs_x86_op *op = &insn->detail->x86.operands[0]; + + addr_location__init(&al); + if (op->type == X86_OP_IMM && + thread__find_symbol(thread, x->cpumode, op->imm, &al) && + al.sym && + al.addr < al.sym->end) { + snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, + al.sym->name, al.addr - al.sym->start, op->imm); + printed = true; + } + addr_location__exit(&al); + } + + if (!printed) + snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); +} + +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp) +{ + int ret; + int count; + cs_insn *insn; + csh cs_handle; + + ret = capstone_init(x->machine, &cs_handle, x->is64bit); + if (ret < 0) + return NULL; + + count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); + if (count > 0) { + if (machine__normalized_is(x->machine, "x86")) + dump_insn_x86(x->thread, &insn[0], x); + else + snprintf(x->out, sizeof(x->out), "%s %s", + insn[0].mnemonic, insn[0].op_str); + *lenp = insn->size; + cs_free(insn, count); + } else { + return NULL; + } + return x->out; +} + static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, cs_insn *insn, FILE *fp) { diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h index 6447dd41b543..c2a6391a45ce 100644 --- a/tools/perf/util/print_insn.h +++ b/tools/perf/util/print_insn.h @@ -8,9 +8,12 @@ struct perf_sample; struct thread; struct machine; +struct perf_insn; size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, struct machine *machine, FILE *fp, struct addr_location *al); size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp); #endif /* PERF_PRINT_INSN_H */ -- 2.43.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen @ 2024-03-15 12:19 ` Adrian Hunter 2024-03-18 22:06 ` Andi Kleen 2024-03-22 10:21 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Adrian Hunter 1 sibling, 1 reply; 12+ messages in thread From: Adrian Hunter @ 2024-03-15 12:19 UTC (permalink / raw) To: Andi Kleen, linux-perf-users; +Cc: changbin.du On 8/03/24 19:06, Andi Kleen wrote: > Support capstone output for the -F +brstackinsn branch dump. > The new output is enabled with the new field brstackdisasm > This was possible before with --xed, but now also allow > it for users that don't have xed using the builtin capstone support. > > Before: > > perf record -b emacs -Q --batch '()' > perf script -F +brstackinsn > ... > emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: > 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3] > 00007f0ab2d17105 insn: 73 51 > 00007f0ab2d17107 insn: 48 89 c1 > 00007f0ab2d1710a insn: 48 39 ca > 00007f0ab2d1710d insn: 73 96 > 00007f0ab2d1710f insn: 48 8d 04 11 > 00007f0ab2d17113 insn: 48 d1 e8 > 00007f0ab2d17116 insn: 49 8d 34 c1 > 00007f0ab2d1711a insn: 44 3a 06 > 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC > 00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC > 00007f0ab2d17158 insn: 48 8d 50 01 > 00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC > 00007f0ab2d170f0 insn: 48 39 ca > 00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC > > After (perf must be compiled with capstone): > > perf script -F +brstackdisasm > > ... > emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: > 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3] > 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 > 00007f0ab2d17107 movq %rax, %rcx > 00007f0ab2d1710a cmpq %rcx, %rdx > 00007f0ab2d1710d jae intel_check_word.constprop.0+0x75 > 00007f0ab2d1710f leaq (%rcx, %rdx), %rax > 00007f0ab2d17113 shrq $1, %rax > 00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi > 00007f0ab2d1711a cmpb (%rsi), %r8b > 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC > 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC > 00007f0ab2d17158 leaq 1(%rax), %rdx > 00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC > 00007f0ab2d170f0 cmpq %rcx, %rdx > 00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC > > Signed-off-by: Andi Kleen <ak@linux.intel.com> > > --- > > v2: Use brstackdisasm instead of keying of disasm > --- > tools/perf/Documentation/perf-script.txt | 7 +++- > tools/perf/builtin-script.c | 32 +++++++++++---- > tools/perf/util/dump-insn.h | 1 + > tools/perf/util/print_insn.c | 52 ++++++++++++++++++++++++ > tools/perf/util/print_insn.h | 3 ++ > 5 files changed, 86 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt > index 005e51df855e..ff086ef05a0c 100644 > --- a/tools/perf/Documentation/perf-script.txt > +++ b/tools/perf/Documentation/perf-script.txt > @@ -132,9 +132,9 @@ OPTIONS > Comma separated list of fields to print. Options are: > comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, > srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, > - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, > + brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, > insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, > - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. > + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, > > Field list can be prepended with the type, trace, sw or hw, > to indicate to which event type the field list applies. > @@ -257,6 +257,9 @@ OPTIONS > can’t know the next sequential instruction after an unconditional branch unless > you calculate that based on its length. > > + brstackdisasm acts like brstackinsn, but will print disassembled instructions if > + perf is built with the capstone library. > + > The brstackoff field will print an offset into a specific dso/binary. > > With the metric option perf script can compute metrics for > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 0299b1ed8744..db18d2c54c59 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -136,6 +136,7 @@ enum perf_output_field { > PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, > PERF_OUTPUT_DSOFF = 1ULL << 41, > PERF_OUTPUT_DISASM = 1ULL << 42, > + PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, > }; > > struct perf_script { > @@ -210,6 +211,7 @@ struct output_option { > {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, > {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, > {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, > + {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, > }; > > enum { > @@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) > "selected. Hence, no address to lookup the source line number.\n"); > return -EINVAL; > } > - if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && > + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) > + && !allow_user_set && > !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { > pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" > "Hint: run 'perf record -b ...'\n"); > @@ -1162,6 +1165,20 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) > return ret; > } > > +static const char *any_dump_insn(struct perf_event_attr *attr, attr is __maybe_unused > + struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp) > +{ > +#ifdef HAVE_LIBCAPSTONE_SUPPORT > + if (PRINT_FIELD(BRSTACKDISASM)) { > + const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); > + if (p) > + return p; > + } > +#endif > + return dump_insn(x, ip, inbuf, inlen, lenp); > +} > + > static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > struct perf_insn *x, u8 *inbuf, int len, > int insn, FILE *fp, int *total_cycles, > @@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > { > int ilen = 0; > int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, > - dump_insn(x, ip, inbuf, len, &ilen)); > + any_dump_insn(attr, x, ip, inbuf, len, &ilen)); > > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "ilen: %d\t", ilen); > @@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > nr = max_blocks + 1; > > x.thread = thread; > + x.machine = machine; > x.cpu = sample->cpu; > > printed += fprintf(fp, "%c", '\n'); > @@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > } else { > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, > - dump_insn(&x, ip, buffer + off, len - off, &ilen)); > + any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > goto out; > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, > - dump_insn(&x, sample->ip, buffer, len, &ilen)); > + any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > for (off = 0; off <= end - start; off += ilen) { > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, > - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); > + any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, > printed += fprintf(fp, "\t\t"); > printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); > } > - if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) > + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) > printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); > > return printed; > @@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv) > "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," > "addr,symoff,srcline,period,iregs,uregs,brstack," > "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," > - "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," > + "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," > "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," > "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", > parse_output_fields), > diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h > index 650125061530..4a7797dd6d09 100644 > --- a/tools/perf/util/dump-insn.h > +++ b/tools/perf/util/dump-insn.h > @@ -11,6 +11,7 @@ struct thread; > struct perf_insn { > /* Initialized by callers: */ > struct thread *thread; > + struct machine *machine; > u8 cpumode; > bool is64bit; > int cpu; > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c > index 8e4e3cffd677..bca4449f0fa8 100644 > --- a/tools/perf/util/print_insn.c > +++ b/tools/perf/util/print_insn.c > @@ -12,6 +12,7 @@ > #include "machine.h" > #include "thread.h" > #include "print_insn.h" > +#include "dump-insn.h" > #include "map.h" > #include "dso.h" > > @@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) > return 0; > } > > +static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) > +{ > + struct addr_location al; > + bool printed = false; > + > + if (insn->detail && insn->detail->x86.op_count == 1) { > + cs_x86_op *op = &insn->detail->x86.operands[0]; > + > + addr_location__init(&al); > + if (op->type == X86_OP_IMM && > + thread__find_symbol(thread, x->cpumode, op->imm, &al) && > + al.sym && > + al.addr < al.sym->end) { > + snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, > + al.sym->name, al.addr - al.sym->start, op->imm); > + printed = true; > + } > + addr_location__exit(&al); > + } > + > + if (!printed) > + snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); > +} > + > +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp) > +{ > + int ret; > + int count; > + cs_insn *insn; > + csh cs_handle; > + > + ret = capstone_init(x->machine, &cs_handle, x->is64bit); > + if (ret < 0) > + return NULL; > + > + count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); > + if (count > 0) { > + if (machine__normalized_is(x->machine, "x86")) > + dump_insn_x86(x->thread, &insn[0], x); > + else > + snprintf(x->out, sizeof(x->out), "%s %s", > + insn[0].mnemonic, insn[0].op_str); > + *lenp = insn->size; > + cs_free(insn, count); > + } else { > + return NULL; > + } > + return x->out; > +} > + Better to factor out a function that does not depend on "sample" e.g. see fprintf_insn_asm() below. static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, FILE *fp) { struct addr_location al; size_t printed = 0; if (insn->detail && insn->detail->x86.op_count == 1) { cs_x86_op *op = &insn->detail->x86.operands[0]; addr_location__init(&al); if (op->type == X86_OP_IMM && thread__find_symbol(thread, cpumode, op->imm, &al)) { printed += fprintf(fp, "%s ", insn[0].mnemonic); printed += symbol__fprintf_symname_offs(al.sym, &al, fp); addr_location__exit(&al); return printed; } addr_location__exit(&al); } printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); return printed; } static bool is64bitip(struct machine *machine, struct addr_location *al) { const struct dso *dso = al->map ? map__dso(al->map) : NULL; if (dso) return dso->is_64_bit; return machine__is(machine, "x86_64") || machine__normalized_is(machine, "arm64") || machine__normalized_is(machine, "s390"); } static ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, bool is64bit, const uint8_t *code, size_t code_size, uint64_t ip, int *lenp, FILE *fp) { size_t printed = 0; cs_insn *insn; csh cs_handle; size_t count; int ret; /* TODO: Try to initiate capstone only once but need a proper place. */ ret = capstone_init(machine, &cs_handle, is64bit); if (ret < 0) return ret; count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); if (count > 0) { if (machine__normalized_is(machine, "x86")) printed += print_insn_x86(thread, cpumode, &insn[0], fp); else printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); if (lenp) *lenp = insn->size; cs_free(insn, count); } else { printed += fprintf(fp, "illegal instruction"); } cs_close(&cs_handle); return printed; } size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, struct machine *machine, FILE *fp, struct addr_location *al) { bool is64bit = is64bitip(machine, al); ssize_t printed; printed = fprintf_insn_asm(machine, thread, sample->cpumode, is64bit, (uint8_t *)sample->insn, sample->insn_len, sample->ip, NULL, fp); if (printed < 0) return sample__fprintf_insn_raw(sample, fp); return printed; } > static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > cs_insn *insn, FILE *fp) > { > diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h > index 6447dd41b543..c2a6391a45ce 100644 > --- a/tools/perf/util/print_insn.h > +++ b/tools/perf/util/print_insn.h > @@ -8,9 +8,12 @@ > struct perf_sample; > struct thread; > struct machine; > +struct perf_insn; > > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > struct machine *machine, FILE *fp, struct addr_location *al); > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); > +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp); > > #endif /* PERF_PRINT_INSN_H */ ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-15 12:19 ` Adrian Hunter @ 2024-03-18 22:06 ` Andi Kleen 2024-03-19 6:52 ` Adrian Hunter 0 siblings, 1 reply; 12+ messages in thread From: Andi Kleen @ 2024-03-18 22:06 UTC (permalink / raw) To: Adrian Hunter; +Cc: linux-perf-users, changbin.du > Better to factor out a function that does not depend on "sample" > e.g. see fprintf_insn_asm() below. this doesn't work because it completely ignores the need of the cs_dump_insn caller for the path that i actually need for my feature, which requires to return a string. I didn't apply it. I did the other fixes. Please no more pointless refactors -Andi ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-18 22:06 ` Andi Kleen @ 2024-03-19 6:52 ` Adrian Hunter 2024-03-20 0:35 ` Andi Kleen 0 siblings, 1 reply; 12+ messages in thread From: Adrian Hunter @ 2024-03-19 6:52 UTC (permalink / raw) To: Andi Kleen; +Cc: linux-perf-users, changbin.du On 19/03/24 00:06, Andi Kleen wrote: >> Better to factor out a function that does not depend on "sample" >> e.g. see fprintf_insn_asm() below. > > this doesn't work because it completely ignores the need of the > cs_dump_insn caller for the path that i actually need for my feature, > which requires to return a string. I didn't apply it. I would probably change the call sites because they already have a file descriptor, but output to memory is doable: const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, u8 *inbuf, int inlen, int *lenp) { FILE *fp = fmemopen(x->out, sizeof(x->out), "w+"); const char *ret = NULL; ssize_t printed; if (!fp) return NULL; printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit, (uint8_t *)inbuf, inlen, ip, lenp, fp); if (printed < 0 || fflush(fp)) goto out_close; ret = x->out; out_close: fclose(fp); return ret; } ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-19 6:52 ` Adrian Hunter @ 2024-03-20 0:35 ` Andi Kleen 2024-03-22 10:21 ` [PATCH] perf script: Consolidate capstone print functions Adrian Hunter 0 siblings, 1 reply; 12+ messages in thread From: Andi Kleen @ 2024-03-20 0:35 UTC (permalink / raw) To: Adrian Hunter; +Cc: linux-perf-users, changbin.du On Tue, Mar 19, 2024 at 08:52:33AM +0200, Adrian Hunter wrote: > On 19/03/24 00:06, Andi Kleen wrote: > >> Better to factor out a function that does not depend on "sample" > >> e.g. see fprintf_insn_asm() below. > > > > this doesn't work because it completely ignores the need of the > > cs_dump_insn caller for the path that i actually need for my feature, > > which requires to return a string. I didn't apply it. > > I would probably change the call sites because they already have a > file descriptor, but output to memory is doable: > FILE *fp = fmemopen(x->out, sizeof(x->out), "w+"); I considered using this at some point, but I'm sure there is some non glibc, that people build perf with, that doesn't have fmemopen, so I didn't. Can we just use my version for now and if you prefer more refactor please submit a follow on cleanup patch? -andi ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] perf script: Consolidate capstone print functions 2024-03-20 0:35 ` Andi Kleen @ 2024-03-22 10:21 ` Adrian Hunter 2024-04-08 20:35 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 12+ messages in thread From: Adrian Hunter @ 2024-03-22 10:21 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Jiri Olsa, Namhyung Kim, Ian Rogers, Andi Kleen, changbin.du, linux-kernel, linux-perf-users Consolidate capstone print functions, to reduce duplication. Amend call sites to use a file pointer for output, which is consistent with most perf tools print functions. Add print_opts with an option to print also the hex value of a resolved symbol+offset. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> --- On 20/03/24 02:35, Andi Kleen wrote: > On Tue, Mar 19, 2024 at 08:52:33AM +0200, Adrian Hunter wrote: >> On 19/03/24 00:06, Andi Kleen wrote: >>>> Better to factor out a function that does not depend on "sample" >>>> e.g. see fprintf_insn_asm() below. >>> >>> this doesn't work because it completely ignores the need of the >>> cs_dump_insn caller for the path that i actually need for my feature, >>> which requires to return a string. I didn't apply it. >> >> I would probably change the call sites because they already have a >> file descriptor, but output to memory is doable: > >> FILE *fp = fmemopen(x->out, sizeof(x->out), "w+"); > > I considered using this at some point, but I'm sure there is some non glibc, > that people build perf with, that doesn't have fmemopen, so I didn't. fmemopen() is POSIX since 2008 > > Can we just use my version for now and if you prefer more refactor > please submit a follow on cleanup patch? Sure, here is a follow on cleanup patch. tools/perf/builtin-script.c | 43 ++++++++++----- tools/perf/util/print_insn.c | 103 ++++++++++++----------------------- tools/perf/util/print_insn.h | 7 ++- 3 files changed, 67 insertions(+), 86 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 59933bd52e0f..6384acf8dad7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1165,18 +1165,29 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) return ret; } -static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused, - struct perf_insn *x, uint64_t ip, - u8 *inbuf, int inlen, int *lenp) +static int any_dump_insn(struct perf_event_attr *attr __maybe_unused, + struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp, + FILE *fp) { #ifdef HAVE_LIBCAPSTONE_SUPPORT if (PRINT_FIELD(BRSTACKDISASM)) { - const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); - if (p) - return p; + int printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit, + (uint8_t *)inbuf, inlen, ip, lenp, + PRINT_INSN_IMM_HEX, fp); + + if (printed > 0) + return printed; } #endif - return dump_insn(x, ip, inbuf, inlen, lenp); + return fprintf(fp, "%s", dump_insn(x, ip, inbuf, inlen, lenp)); +} + +static int add_padding(FILE *fp, int printed, int padding) +{ + if (printed >= 0 && printed < padding) + printed += fprintf(fp, "%*s", padding - printed, ""); + return printed; } static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, @@ -1186,8 +1197,10 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct thread *thread) { int ilen = 0; - int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, - any_dump_insn(attr, x, ip, inbuf, len, &ilen)); + int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip); + + printed += add_padding(fp, any_dump_insn(attr, x, ip, inbuf, len, &ilen, fp), 30); + printed += fprintf(fp, "\t"); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "ilen: %d\t", ilen); @@ -1330,8 +1343,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, break; } else { ilen = 0; - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, - any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); + printed += fprintf(fp, "\t%016" PRIx64 "\t", ip); + printed += any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen, fp); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1378,8 +1391,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, if (len <= 0) goto out; ilen = 0; - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, - any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); + printed += fprintf(fp, "\t%016" PRIx64 "\t", sample->ip); + printed += any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen, fp); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1389,8 +1402,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, } for (off = 0; off <= end - start; off += ilen) { ilen = 0; - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, - any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); + printed += fprintf(fp, "\t%016" PRIx64 "\t", start + off); + printed += any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen, fp); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c index 8825330d435f..931a2c5293c9 100644 --- a/tools/perf/util/print_insn.c +++ b/tools/perf/util/print_insn.c @@ -72,59 +72,8 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) return 0; } -static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) -{ - struct addr_location al; - bool printed = false; - - if (insn->detail && insn->detail->x86.op_count == 1) { - cs_x86_op *op = &insn->detail->x86.operands[0]; - - addr_location__init(&al); - if (op->type == X86_OP_IMM && - thread__find_symbol(thread, x->cpumode, op->imm, &al) && - al.sym && - al.addr < al.sym->end) { - snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, - al.sym->name, al.addr - al.sym->start, op->imm); - printed = true; - } - addr_location__exit(&al); - } - - if (!printed) - snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); -} - -const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, - u8 *inbuf, int inlen, int *lenp) -{ - int ret; - int count; - cs_insn *insn; - csh cs_handle; - - ret = capstone_init(x->machine, &cs_handle, x->is64bit); - if (ret < 0) - return NULL; - - count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); - if (count > 0) { - if (machine__normalized_is(x->machine, "x86")) - dump_insn_x86(x->thread, &insn[0], x); - else - snprintf(x->out, sizeof(x->out), "%s %s", - insn[0].mnemonic, insn[0].op_str); - *lenp = insn->size; - cs_free(insn, count); - } else { - return NULL; - } - return x->out; -} - -static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, - cs_insn *insn, FILE *fp) +static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, + int print_opts, FILE *fp) { struct addr_location al; size_t printed = 0; @@ -134,9 +83,11 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, addr_location__init(&al); if (op->type == X86_OP_IMM && - thread__find_symbol(thread, sample->cpumode, op->imm, &al)) { + thread__find_symbol(thread, cpumode, op->imm, &al)) { printed += fprintf(fp, "%s ", insn[0].mnemonic); printed += symbol__fprintf_symname_offs(al.sym, &al, fp); + if (print_opts & PRINT_INSN_IMM_HEX) + printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); addr_location__exit(&al); return printed; } @@ -159,39 +110,53 @@ static bool is64bitip(struct machine *machine, struct addr_location *al) machine__normalized_is(machine, "s390"); } -size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, - struct machine *machine, FILE *fp, - struct addr_location *al) +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp) { - csh cs_handle; + size_t printed; cs_insn *insn; + csh cs_handle; size_t count; - size_t printed = 0; int ret; - bool is64bit = is64bitip(machine, al); /* TODO: Try to initiate capstone only once but need a proper place. */ ret = capstone_init(machine, &cs_handle, is64bit); - if (ret < 0) { - /* fallback */ - return sample__fprintf_insn_raw(sample, fp); - } + if (ret < 0) + return ret; - count = cs_disasm(cs_handle, (uint8_t *)sample->insn, sample->insn_len, - sample->ip, 1, &insn); + count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); if (count > 0) { if (machine__normalized_is(machine, "x86")) - printed += print_insn_x86(sample, thread, &insn[0], fp); + printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); else - printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + if (lenp) + *lenp = insn->size; cs_free(insn, count); } else { - printed += fprintf(fp, "illegal instruction"); + printed = -1; } cs_close(&cs_handle); return printed; } + +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, + struct machine *machine, FILE *fp, + struct addr_location *al) +{ + bool is64bit = is64bitip(machine, al); + ssize_t printed; + + printed = fprintf_insn_asm(machine, thread, sample->cpumode, is64bit, + (uint8_t *)sample->insn, sample->insn_len, + sample->ip, NULL, 0, fp); + if (printed < 0) + return sample__fprintf_insn_raw(sample, fp); + + return printed; +} #else size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused, struct thread *thread __maybe_unused, diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h index c2a6391a45ce..07d11af3fc1c 100644 --- a/tools/perf/util/print_insn.h +++ b/tools/perf/util/print_insn.h @@ -10,10 +10,13 @@ struct thread; struct machine; struct perf_insn; +#define PRINT_INSN_IMM_HEX (1<<0) + size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, struct machine *machine, FILE *fp, struct addr_location *al); size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); -const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, - u8 *inbuf, int inlen, int *lenp); +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp); #endif /* PERF_PRINT_INSN_H */ -- 2.34.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf script: Consolidate capstone print functions 2024-03-22 10:21 ` [PATCH] perf script: Consolidate capstone print functions Adrian Hunter @ 2024-04-08 20:35 ` Arnaldo Carvalho de Melo 0 siblings, 0 replies; 12+ messages in thread From: Arnaldo Carvalho de Melo @ 2024-04-08 20:35 UTC (permalink / raw) To: Adrian Hunter Cc: Jiri Olsa, Namhyung Kim, Ian Rogers, Andi Kleen, changbin.du, linux-kernel, linux-perf-users On Fri, Mar 22, 2024 at 12:21:58PM +0200, Adrian Hunter wrote: > Consolidate capstone print functions, to reduce duplication. Amend call > sites to use a file pointer for output, which is consistent with most > perf tools print functions. Add print_opts with an option to print also > the hex value of a resolved symbol+offset. > > Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> > --- I'll fix this: 38 11.53 ubuntu:18.04 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) util/print_insn.c: In function 'print_insn_x86': util/print_insn.c:90:35: error: expected ')' before 'PRIx64' printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); ^~~~~~ util/print_insn.c:90:32: error: conversion lacks type at end of format [-Werror=format=] printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); ^ cc1: all warnings being treated as errors /git/perf-6.8.0/tools/build/Makefile.build:158: recipe for target 'util' failed make[3]: *** [util] Error 2 > > On 20/03/24 02:35, Andi Kleen wrote: > > On Tue, Mar 19, 2024 at 08:52:33AM +0200, Adrian Hunter wrote: > >> On 19/03/24 00:06, Andi Kleen wrote: > >>>> Better to factor out a function that does not depend on "sample" > >>>> e.g. see fprintf_insn_asm() below. > >>> > >>> this doesn't work because it completely ignores the need of the > >>> cs_dump_insn caller for the path that i actually need for my feature, > >>> which requires to return a string. I didn't apply it. > >> > >> I would probably change the call sites because they already have a > >> file descriptor, but output to memory is doable: > > > >> FILE *fp = fmemopen(x->out, sizeof(x->out), "w+"); > > > > I considered using this at some point, but I'm sure there is some non glibc, > > that people build perf with, that doesn't have fmemopen, so I didn't. > > fmemopen() is POSIX since 2008 > > > > > Can we just use my version for now and if you prefer more refactor > > please submit a follow on cleanup patch? > > Sure, here is a follow on cleanup patch. > > > tools/perf/builtin-script.c | 43 ++++++++++----- > tools/perf/util/print_insn.c | 103 ++++++++++++----------------------- > tools/perf/util/print_insn.h | 7 ++- > 3 files changed, 67 insertions(+), 86 deletions(-) > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 59933bd52e0f..6384acf8dad7 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -1165,18 +1165,29 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) > return ret; > } > > -static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused, > - struct perf_insn *x, uint64_t ip, > - u8 *inbuf, int inlen, int *lenp) > +static int any_dump_insn(struct perf_event_attr *attr __maybe_unused, > + struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp, > + FILE *fp) > { > #ifdef HAVE_LIBCAPSTONE_SUPPORT > if (PRINT_FIELD(BRSTACKDISASM)) { > - const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); > - if (p) > - return p; > + int printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit, > + (uint8_t *)inbuf, inlen, ip, lenp, > + PRINT_INSN_IMM_HEX, fp); > + > + if (printed > 0) > + return printed; > } > #endif > - return dump_insn(x, ip, inbuf, inlen, lenp); > + return fprintf(fp, "%s", dump_insn(x, ip, inbuf, inlen, lenp)); > +} > + > +static int add_padding(FILE *fp, int printed, int padding) > +{ > + if (printed >= 0 && printed < padding) > + printed += fprintf(fp, "%*s", padding - printed, ""); > + return printed; > } > > static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > @@ -1186,8 +1197,10 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > struct thread *thread) > { > int ilen = 0; > - int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, > - any_dump_insn(attr, x, ip, inbuf, len, &ilen)); > + int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip); > + > + printed += add_padding(fp, any_dump_insn(attr, x, ip, inbuf, len, &ilen, fp), 30); > + printed += fprintf(fp, "\t"); > > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "ilen: %d\t", ilen); > @@ -1330,8 +1343,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > break; > } else { > ilen = 0; > - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, > - any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); > + printed += fprintf(fp, "\t%016" PRIx64 "\t", ip); > + printed += any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen, fp); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1378,8 +1391,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > if (len <= 0) > goto out; > ilen = 0; > - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, > - any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); > + printed += fprintf(fp, "\t%016" PRIx64 "\t", sample->ip); > + printed += any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen, fp); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1389,8 +1402,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > } > for (off = 0; off <= end - start; off += ilen) { > ilen = 0; > - printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, > - any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); > + printed += fprintf(fp, "\t%016" PRIx64 "\t", start + off); > + printed += any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen, fp); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c > index 8825330d435f..931a2c5293c9 100644 > --- a/tools/perf/util/print_insn.c > +++ b/tools/perf/util/print_insn.c > @@ -72,59 +72,8 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) > return 0; > } > > -static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) > -{ > - struct addr_location al; > - bool printed = false; > - > - if (insn->detail && insn->detail->x86.op_count == 1) { > - cs_x86_op *op = &insn->detail->x86.operands[0]; > - > - addr_location__init(&al); > - if (op->type == X86_OP_IMM && > - thread__find_symbol(thread, x->cpumode, op->imm, &al) && > - al.sym && > - al.addr < al.sym->end) { > - snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, > - al.sym->name, al.addr - al.sym->start, op->imm); > - printed = true; > - } > - addr_location__exit(&al); > - } > - > - if (!printed) > - snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); > -} > - > -const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > - u8 *inbuf, int inlen, int *lenp) > -{ > - int ret; > - int count; > - cs_insn *insn; > - csh cs_handle; > - > - ret = capstone_init(x->machine, &cs_handle, x->is64bit); > - if (ret < 0) > - return NULL; > - > - count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); > - if (count > 0) { > - if (machine__normalized_is(x->machine, "x86")) > - dump_insn_x86(x->thread, &insn[0], x); > - else > - snprintf(x->out, sizeof(x->out), "%s %s", > - insn[0].mnemonic, insn[0].op_str); > - *lenp = insn->size; > - cs_free(insn, count); > - } else { > - return NULL; > - } > - return x->out; > -} > - > -static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > - cs_insn *insn, FILE *fp) > +static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, > + int print_opts, FILE *fp) > { > struct addr_location al; > size_t printed = 0; > @@ -134,9 +83,11 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > > addr_location__init(&al); > if (op->type == X86_OP_IMM && > - thread__find_symbol(thread, sample->cpumode, op->imm, &al)) { > + thread__find_symbol(thread, cpumode, op->imm, &al)) { > printed += fprintf(fp, "%s ", insn[0].mnemonic); > printed += symbol__fprintf_symname_offs(al.sym, &al, fp); > + if (print_opts & PRINT_INSN_IMM_HEX) > + printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); > addr_location__exit(&al); > return printed; > } > @@ -159,39 +110,53 @@ static bool is64bitip(struct machine *machine, struct addr_location *al) > machine__normalized_is(machine, "s390"); > } > > -size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > - struct machine *machine, FILE *fp, > - struct addr_location *al) > +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, > + bool is64bit, const uint8_t *code, size_t code_size, > + uint64_t ip, int *lenp, int print_opts, FILE *fp) > { > - csh cs_handle; > + size_t printed; > cs_insn *insn; > + csh cs_handle; > size_t count; > - size_t printed = 0; > int ret; > - bool is64bit = is64bitip(machine, al); > > /* TODO: Try to initiate capstone only once but need a proper place. */ > ret = capstone_init(machine, &cs_handle, is64bit); > - if (ret < 0) { > - /* fallback */ > - return sample__fprintf_insn_raw(sample, fp); > - } > + if (ret < 0) > + return ret; > > - count = cs_disasm(cs_handle, (uint8_t *)sample->insn, sample->insn_len, > - sample->ip, 1, &insn); > + count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); > if (count > 0) { > if (machine__normalized_is(machine, "x86")) > - printed += print_insn_x86(sample, thread, &insn[0], fp); > + printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); > else > - printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); > + printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); > + if (lenp) > + *lenp = insn->size; > cs_free(insn, count); > } else { > - printed += fprintf(fp, "illegal instruction"); > + printed = -1; > } > > cs_close(&cs_handle); > return printed; > } > + > +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > + struct machine *machine, FILE *fp, > + struct addr_location *al) > +{ > + bool is64bit = is64bitip(machine, al); > + ssize_t printed; > + > + printed = fprintf_insn_asm(machine, thread, sample->cpumode, is64bit, > + (uint8_t *)sample->insn, sample->insn_len, > + sample->ip, NULL, 0, fp); > + if (printed < 0) > + return sample__fprintf_insn_raw(sample, fp); > + > + return printed; > +} > #else > size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused, > struct thread *thread __maybe_unused, > diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h > index c2a6391a45ce..07d11af3fc1c 100644 > --- a/tools/perf/util/print_insn.h > +++ b/tools/perf/util/print_insn.h > @@ -10,10 +10,13 @@ struct thread; > struct machine; > struct perf_insn; > > +#define PRINT_INSN_IMM_HEX (1<<0) > + > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > struct machine *machine, FILE *fp, struct addr_location *al); > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); > -const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > - u8 *inbuf, int inlen, int *lenp); > +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, > + bool is64bit, const uint8_t *code, size_t code_size, > + uint64_t ip, int *lenp, int print_opts, FILE *fp); > > #endif /* PERF_PRINT_INSN_H */ > -- > 2.34.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm 2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen 2024-03-15 12:19 ` Adrian Hunter @ 2024-03-22 10:21 ` Adrian Hunter 1 sibling, 0 replies; 12+ messages in thread From: Adrian Hunter @ 2024-03-22 10:21 UTC (permalink / raw) To: Andi Kleen Cc: changbin.du, Jiri Olsa, Arnaldo Carvalho de Melo, Ian Rogers, Namhyung Kim, linux-perf-users On 8/03/24 19:06, Andi Kleen wrote: > Support capstone output for the -F +brstackinsn branch dump. > The new output is enabled with the new field brstackdisasm > This was possible before with --xed, but now also allow > it for users that don't have xed using the builtin capstone support. > > Before: > > perf record -b emacs -Q --batch '()' > perf script -F +brstackinsn > ... > emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: > 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3] > 00007f0ab2d17105 insn: 73 51 > 00007f0ab2d17107 insn: 48 89 c1 > 00007f0ab2d1710a insn: 48 39 ca > 00007f0ab2d1710d insn: 73 96 > 00007f0ab2d1710f insn: 48 8d 04 11 > 00007f0ab2d17113 insn: 48 d1 e8 > 00007f0ab2d17116 insn: 49 8d 34 c1 > 00007f0ab2d1711a insn: 44 3a 06 > 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC > 00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC > 00007f0ab2d17158 insn: 48 8d 50 01 > 00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC > 00007f0ab2d170f0 insn: 48 39 ca > 00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC > > After (perf must be compiled with capstone): > > perf script -F +brstackdisasm > > ... > emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: > 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3] > 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 > 00007f0ab2d17107 movq %rax, %rcx > 00007f0ab2d1710a cmpq %rcx, %rdx > 00007f0ab2d1710d jae intel_check_word.constprop.0+0x75 > 00007f0ab2d1710f leaq (%rcx, %rdx), %rax > 00007f0ab2d17113 shrq $1, %rax > 00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi > 00007f0ab2d1711a cmpb (%rsi), %r8b > 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC > 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC > 00007f0ab2d17158 leaq 1(%rax), %rdx > 00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC > 00007f0ab2d170f0 cmpq %rcx, %rdx > 00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC > > Signed-off-by: Andi Kleen <ak@linux.intel.com> Comma in subject is slightly odd, and needs fixes to build with NO_CAPSTONE=1 diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index db18d2c54c59..59933bd52e0f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1165,7 +1165,7 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) return ret; } -static const char *any_dump_insn(struct perf_event_attr *attr, +static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused, struct perf_insn *x, uint64_t ip, u8 *inbuf, int inlen, int *lenp) { diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c index bca4449f0fa8..8825330d435f 100644 --- a/tools/perf/util/print_insn.c +++ b/tools/perf/util/print_insn.c @@ -196,7 +196,8 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused, struct thread *thread __maybe_unused, struct machine *machine __maybe_unused, - FILE *fp __maybe_unused) + FILE *fp __maybe_unused, + struct addr_location *al __maybe_unused) { return 0; } Otherwise: Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> > > --- > > v2: Use brstackdisasm instead of keying of disasm > --- > tools/perf/Documentation/perf-script.txt | 7 +++- > tools/perf/builtin-script.c | 32 +++++++++++---- > tools/perf/util/dump-insn.h | 1 + > tools/perf/util/print_insn.c | 52 ++++++++++++++++++++++++ > tools/perf/util/print_insn.h | 3 ++ > 5 files changed, 86 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt > index 005e51df855e..ff086ef05a0c 100644 > --- a/tools/perf/Documentation/perf-script.txt > +++ b/tools/perf/Documentation/perf-script.txt > @@ -132,9 +132,9 @@ OPTIONS > Comma separated list of fields to print. Options are: > comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, > srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, > - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, > + brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, > insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, > - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. > + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, > > Field list can be prepended with the type, trace, sw or hw, > to indicate to which event type the field list applies. > @@ -257,6 +257,9 @@ OPTIONS > can’t know the next sequential instruction after an unconditional branch unless > you calculate that based on its length. > > + brstackdisasm acts like brstackinsn, but will print disassembled instructions if > + perf is built with the capstone library. > + > The brstackoff field will print an offset into a specific dso/binary. > > With the metric option perf script can compute metrics for > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 0299b1ed8744..db18d2c54c59 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -136,6 +136,7 @@ enum perf_output_field { > PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, > PERF_OUTPUT_DSOFF = 1ULL << 41, > PERF_OUTPUT_DISASM = 1ULL << 42, > + PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, > }; > > struct perf_script { > @@ -210,6 +211,7 @@ struct output_option { > {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, > {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, > {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, > + {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, > }; > > enum { > @@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) > "selected. Hence, no address to lookup the source line number.\n"); > return -EINVAL; > } > - if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && > + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) > + && !allow_user_set && > !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { > pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" > "Hint: run 'perf record -b ...'\n"); > @@ -1162,6 +1165,20 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr) > return ret; > } > > +static const char *any_dump_insn(struct perf_event_attr *attr, > + struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp) > +{ > +#ifdef HAVE_LIBCAPSTONE_SUPPORT > + if (PRINT_FIELD(BRSTACKDISASM)) { > + const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); > + if (p) > + return p; > + } > +#endif > + return dump_insn(x, ip, inbuf, inlen, lenp); > +} > + > static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > struct perf_insn *x, u8 *inbuf, int len, > int insn, FILE *fp, int *total_cycles, > @@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, > { > int ilen = 0; > int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, > - dump_insn(x, ip, inbuf, len, &ilen)); > + any_dump_insn(attr, x, ip, inbuf, len, &ilen)); > > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "ilen: %d\t", ilen); > @@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > nr = max_blocks + 1; > > x.thread = thread; > + x.machine = machine; > x.cpu = sample->cpu; > > printed += fprintf(fp, "%c", '\n'); > @@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > } else { > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, > - dump_insn(&x, ip, buffer + off, len - off, &ilen)); > + any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > goto out; > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, > - dump_insn(&x, sample->ip, buffer, len, &ilen)); > + any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, > for (off = 0; off <= end - start; off += ilen) { > ilen = 0; > printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, > - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); > + any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); > if (PRINT_FIELD(BRSTACKINSNLEN)) > printed += fprintf(fp, "\tilen: %d", ilen); > printed += fprintf(fp, "\n"); > @@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, > printed += fprintf(fp, "\t\t"); > printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); > } > - if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) > + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) > printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); > > return printed; > @@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv) > "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," > "addr,symoff,srcline,period,iregs,uregs,brstack," > "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," > - "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," > + "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," > "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," > "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", > parse_output_fields), > diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h > index 650125061530..4a7797dd6d09 100644 > --- a/tools/perf/util/dump-insn.h > +++ b/tools/perf/util/dump-insn.h > @@ -11,6 +11,7 @@ struct thread; > struct perf_insn { > /* Initialized by callers: */ > struct thread *thread; > + struct machine *machine; > u8 cpumode; > bool is64bit; > int cpu; > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c > index 8e4e3cffd677..bca4449f0fa8 100644 > --- a/tools/perf/util/print_insn.c > +++ b/tools/perf/util/print_insn.c > @@ -12,6 +12,7 @@ > #include "machine.h" > #include "thread.h" > #include "print_insn.h" > +#include "dump-insn.h" > #include "map.h" > #include "dso.h" > > @@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) > return 0; > } > > +static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) > +{ > + struct addr_location al; > + bool printed = false; > + > + if (insn->detail && insn->detail->x86.op_count == 1) { > + cs_x86_op *op = &insn->detail->x86.operands[0]; > + > + addr_location__init(&al); > + if (op->type == X86_OP_IMM && > + thread__find_symbol(thread, x->cpumode, op->imm, &al) && > + al.sym && > + al.addr < al.sym->end) { > + snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, > + al.sym->name, al.addr - al.sym->start, op->imm); > + printed = true; > + } > + addr_location__exit(&al); > + } > + > + if (!printed) > + snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); > +} > + > +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp) > +{ > + int ret; > + int count; > + cs_insn *insn; > + csh cs_handle; > + > + ret = capstone_init(x->machine, &cs_handle, x->is64bit); > + if (ret < 0) > + return NULL; > + > + count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); > + if (count > 0) { > + if (machine__normalized_is(x->machine, "x86")) > + dump_insn_x86(x->thread, &insn[0], x); > + else > + snprintf(x->out, sizeof(x->out), "%s %s", > + insn[0].mnemonic, insn[0].op_str); > + *lenp = insn->size; > + cs_free(insn, count); > + } else { > + return NULL; > + } > + return x->out; > +} > + > static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > cs_insn *insn, FILE *fp) > { > diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h > index 6447dd41b543..c2a6391a45ce 100644 > --- a/tools/perf/util/print_insn.h > +++ b/tools/perf/util/print_insn.h > @@ -8,9 +8,12 @@ > struct perf_sample; > struct thread; > struct machine; > +struct perf_insn; > > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > struct machine *machine, FILE *fp, struct addr_location *al); > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); > +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, > + u8 *inbuf, int inlen, int *lenp); > > #endif /* PERF_PRINT_INSN_H */ ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS 2024-03-08 17:06 [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen 2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen @ 2024-03-15 12:19 ` Adrian Hunter 2024-03-22 10:21 ` Adrian Hunter 2 siblings, 0 replies; 12+ messages in thread From: Adrian Hunter @ 2024-03-15 12:19 UTC (permalink / raw) To: Andi Kleen, linux-perf-users; +Cc: changbin.du On 8/03/24 19:06, Andi Kleen wrote: > Use the DSO to resolve whether an IP is 32bit or 64bit and use that to > configure capstone to the correct mode. This allows to correctly > disassemble 32bit code under a 64bit OS. > > % cat > loop.c > volatile int var; > int main(void) > { > int i; > for (i = 0; i < 100000; i++) > var++; > } > % gcc -m32 -o loop loop.c > % perf record -e cycles:u ./loop > % perf script -F +disasm > loop 82665 1833176.618023: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618029: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618031: 7 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618034: 91 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618036: 1242 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > > Signed-off-by: Andi Kleen <ak@linux.intel.com> > > --- > > v2: Factor out DSO lookup into separate function > v3: Pass down al > v4: Simplify is64bitip > --- > tools/perf/builtin-script.c | 9 +++++---- > tools/perf/util/print_insn.c | 24 ++++++++++++++++++++---- > tools/perf/util/print_insn.h | 2 +- > 3 files changed, 26 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 37088cc0ff1b..0299b1ed8744 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -1517,7 +1517,8 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread, > static int perf_sample__fprintf_insn(struct perf_sample *sample, > struct perf_event_attr *attr, > struct thread *thread, > - struct machine *machine, FILE *fp) > + struct machine *machine, FILE *fp, > + struct addr_location *al) perf's fprintf functions usually have the "FILE *fp" as the last parameter, so better to add "struct addr_location *al" as second-to-last. > { > int printed = 0; > > @@ -1531,7 +1532,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, > } > if (PRINT_FIELD(DISASM) && sample->insn_len) { > printed += fprintf(fp, "\t\t"); > - printed += sample__fprintf_insn_asm(sample, thread, machine, fp); > + printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); > } > if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) > printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); > @@ -1606,7 +1607,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, > if (print_srcline_last) > printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); > > - printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp); > + printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); > printed += fprintf(fp, "\n"); > if (PRINT_FIELD(SRCCODE)) { > int ret = map__fprintf_srccode(al->map, al->addr, stdout, > @@ -2259,7 +2260,7 @@ static void process_event(struct perf_script *script, > > if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) > perf_sample__fprintf_bpf_output(sample, fp); > - perf_sample__fprintf_insn(sample, attr, thread, machine, fp); > + perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); > > if (PRINT_FIELD(PHYS_ADDR)) > fprintf(fp, "%16" PRIx64, sample->phys_addr); > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c > index 459e0e93d7b1..8e4e3cffd677 100644 > --- a/tools/perf/util/print_insn.c > +++ b/tools/perf/util/print_insn.c > @@ -12,6 +12,8 @@ > #include "machine.h" > #include "thread.h" > #include "print_insn.h" > +#include "map.h" > +#include "dso.h" > > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) > { > @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) > #ifdef HAVE_LIBCAPSTONE_SUPPORT > #include <capstone/capstone.h> > > -static int capstone_init(struct machine *machine, csh *cs_handle) > +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) > { > cs_arch arch; > cs_mode mode; > > - if (machine__is(machine, "x86_64")) { > + if (machine__is(machine, "x86_64") && is64) { > arch = CS_ARCH_X86; > mode = CS_MODE_64; > } else if (machine__normalized_is(machine, "x86")) { > @@ -93,17 +95,31 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > return printed; > } > > +static bool is64bitip(struct machine *machine, struct addr_location *al) > +{ > + const struct dso *dso = al->map ? map__dso(al->map) : NULL; > + > + if (dso) > + return dso->is_64_bit; > + > + return machine__is(machine, "x86_64") || > + machine__normalized_is(machine, "arm64") || > + machine__normalized_is(machine, "s390"); > +} > + > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > - struct machine *machine, FILE *fp) > + struct machine *machine, FILE *fp, > + struct addr_location *al) > { > csh cs_handle; > cs_insn *insn; > size_t count; > size_t printed = 0; > int ret; > + bool is64bit = is64bitip(machine, al); > > /* TODO: Try to initiate capstone only once but need a proper place. */ > - ret = capstone_init(machine, &cs_handle); > + ret = capstone_init(machine, &cs_handle, is64bit); > if (ret < 0) { > /* fallback */ > return sample__fprintf_insn_raw(sample, fp); sample__fprintf_insn_asm() stub needs changing too > diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h > index 465bdcfcc2fd..6447dd41b543 100644 > --- a/tools/perf/util/print_insn.h > +++ b/tools/perf/util/print_insn.h > @@ -10,7 +10,7 @@ struct thread; > struct machine; > > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > - struct machine *machine, FILE *fp); > + struct machine *machine, FILE *fp, struct addr_location *al); > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); > > #endif /* PERF_PRINT_INSN_H */ ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS 2024-03-08 17:06 [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen 2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen 2024-03-15 12:19 ` [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Adrian Hunter @ 2024-03-22 10:21 ` Adrian Hunter 2024-03-22 10:47 ` Thomas Richter 2 siblings, 1 reply; 12+ messages in thread From: Adrian Hunter @ 2024-03-22 10:21 UTC (permalink / raw) To: Andi Kleen Cc: changbin.du, Arnaldo Carvalho de Melo, Ian Rogers, Namhyung Kim, Jiri Olsa, linux-perf-users On 8/03/24 19:06, Andi Kleen wrote: > Use the DSO to resolve whether an IP is 32bit or 64bit and use that to > configure capstone to the correct mode. This allows to correctly > disassemble 32bit code under a 64bit OS. > > % cat > loop.c > volatile int var; > int main(void) > { > int i; > for (i = 0; i < 100000; i++) > var++; > } > % gcc -m32 -o loop loop.c > % perf record -e cycles:u ./loop > % perf script -F +disasm > loop 82665 1833176.618023: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618029: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618031: 7 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618034: 91 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > loop 82665 1833176.618036: 1242 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax > > Signed-off-by: Andi Kleen <ak@linux.intel.com> Comma in subject is slightly odd, otherwise: Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> > > --- > > v2: Factor out DSO lookup into separate function > v3: Pass down al > v4: Simplify is64bitip > --- > tools/perf/builtin-script.c | 9 +++++---- > tools/perf/util/print_insn.c | 24 ++++++++++++++++++++---- > tools/perf/util/print_insn.h | 2 +- > 3 files changed, 26 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 37088cc0ff1b..0299b1ed8744 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -1517,7 +1517,8 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread, > static int perf_sample__fprintf_insn(struct perf_sample *sample, > struct perf_event_attr *attr, > struct thread *thread, > - struct machine *machine, FILE *fp) > + struct machine *machine, FILE *fp, > + struct addr_location *al) > { > int printed = 0; > > @@ -1531,7 +1532,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, > } > if (PRINT_FIELD(DISASM) && sample->insn_len) { > printed += fprintf(fp, "\t\t"); > - printed += sample__fprintf_insn_asm(sample, thread, machine, fp); > + printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); > } > if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) > printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); > @@ -1606,7 +1607,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, > if (print_srcline_last) > printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); > > - printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp); > + printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); > printed += fprintf(fp, "\n"); > if (PRINT_FIELD(SRCCODE)) { > int ret = map__fprintf_srccode(al->map, al->addr, stdout, > @@ -2259,7 +2260,7 @@ static void process_event(struct perf_script *script, > > if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) > perf_sample__fprintf_bpf_output(sample, fp); > - perf_sample__fprintf_insn(sample, attr, thread, machine, fp); > + perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); > > if (PRINT_FIELD(PHYS_ADDR)) > fprintf(fp, "%16" PRIx64, sample->phys_addr); > diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c > index 459e0e93d7b1..8e4e3cffd677 100644 > --- a/tools/perf/util/print_insn.c > +++ b/tools/perf/util/print_insn.c > @@ -12,6 +12,8 @@ > #include "machine.h" > #include "thread.h" > #include "print_insn.h" > +#include "map.h" > +#include "dso.h" > > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) > { > @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) > #ifdef HAVE_LIBCAPSTONE_SUPPORT > #include <capstone/capstone.h> > > -static int capstone_init(struct machine *machine, csh *cs_handle) > +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) > { > cs_arch arch; > cs_mode mode; > > - if (machine__is(machine, "x86_64")) { > + if (machine__is(machine, "x86_64") && is64) { > arch = CS_ARCH_X86; > mode = CS_MODE_64; > } else if (machine__normalized_is(machine, "x86")) { > @@ -93,17 +95,31 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, > return printed; > } > > +static bool is64bitip(struct machine *machine, struct addr_location *al) > +{ > + const struct dso *dso = al->map ? map__dso(al->map) : NULL; > + > + if (dso) > + return dso->is_64_bit; > + > + return machine__is(machine, "x86_64") || > + machine__normalized_is(machine, "arm64") || > + machine__normalized_is(machine, "s390"); > +} > + > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > - struct machine *machine, FILE *fp) > + struct machine *machine, FILE *fp, > + struct addr_location *al) > { > csh cs_handle; > cs_insn *insn; > size_t count; > size_t printed = 0; > int ret; > + bool is64bit = is64bitip(machine, al); > > /* TODO: Try to initiate capstone only once but need a proper place. */ > - ret = capstone_init(machine, &cs_handle); > + ret = capstone_init(machine, &cs_handle, is64bit); > if (ret < 0) { > /* fallback */ > return sample__fprintf_insn_raw(sample, fp); > diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h > index 465bdcfcc2fd..6447dd41b543 100644 > --- a/tools/perf/util/print_insn.h > +++ b/tools/perf/util/print_insn.h > @@ -10,7 +10,7 @@ struct thread; > struct machine; > > size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, > - struct machine *machine, FILE *fp); > + struct machine *machine, FILE *fp, struct addr_location *al); > size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); > > #endif /* PERF_PRINT_INSN_H */ ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS 2024-03-22 10:21 ` Adrian Hunter @ 2024-03-22 10:47 ` Thomas Richter 0 siblings, 0 replies; 12+ messages in thread From: Thomas Richter @ 2024-03-22 10:47 UTC (permalink / raw) To: Adrian Hunter, Andi Kleen Cc: changbin.du, Arnaldo Carvalho de Melo, Ian Rogers, Namhyung Kim, Jiri Olsa, linux-perf-users On 3/22/24 11:21, Adrian Hunter wrote: > On 8/03/24 19:06, Andi Kleen wrote: >> Use the DSO to resolve whether an IP is 32bit or 64bit and use that to >> configure capstone to the correct mode. This allows to correctly >> disassemble 32bit code under a 64bit OS. >> >> % cat > loop.c >> volatile int var; >> int main(void) >> { >> int i; >> for (i = 0; i < 100000; i++) >> var++; >> } >> % gcc -m32 -o loop loop.c >> % perf record -e cycles:u ./loop >> % perf script -F +disasm >> loop 82665 1833176.618023: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax >> loop 82665 1833176.618029: 1 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax >> loop 82665 1833176.618031: 7 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax >> loop 82665 1833176.618034: 91 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax >> loop 82665 1833176.618036: 1242 cycles:u: f7eed500 _start+0x0 (/usr/lib/ld-linux.so.2) movl %esp, %eax >> >> Signed-off-by: Andi Kleen <ak@linux.intel.com> > > Comma in subject is slightly odd, otherwise: > > Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> > >> >> --- >> >> v2: Factor out DSO lookup into separate function >> v3: Pass down al >> v4: Simplify is64bitip >> --- >> tools/perf/builtin-script.c | 9 +++++---- >> tools/perf/util/print_insn.c | 24 ++++++++++++++++++++---- >> tools/perf/util/print_insn.h | 2 +- >> 3 files changed, 26 insertions(+), 9 deletions(-) >> >> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c >> index 37088cc0ff1b..0299b1ed8744 100644 >> --- a/tools/perf/builtin-script.c >> +++ b/tools/perf/builtin-script.c >> @@ -1517,7 +1517,8 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread, >> static int perf_sample__fprintf_insn(struct perf_sample *sample, >> struct perf_event_attr *attr, >> struct thread *thread, >> - struct machine *machine, FILE *fp) >> + struct machine *machine, FILE *fp, >> + struct addr_location *al) >> { >> int printed = 0; >> >> @@ -1531,7 +1532,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, >> } >> if (PRINT_FIELD(DISASM) && sample->insn_len) { >> printed += fprintf(fp, "\t\t"); >> - printed += sample__fprintf_insn_asm(sample, thread, machine, fp); >> + printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); >> } >> if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) >> printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); >> @@ -1606,7 +1607,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, >> if (print_srcline_last) >> printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); >> >> - printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp); >> + printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); >> printed += fprintf(fp, "\n"); >> if (PRINT_FIELD(SRCCODE)) { >> int ret = map__fprintf_srccode(al->map, al->addr, stdout, >> @@ -2259,7 +2260,7 @@ static void process_event(struct perf_script *script, >> >> if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) >> perf_sample__fprintf_bpf_output(sample, fp); >> - perf_sample__fprintf_insn(sample, attr, thread, machine, fp); >> + perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al); >> >> if (PRINT_FIELD(PHYS_ADDR)) >> fprintf(fp, "%16" PRIx64, sample->phys_addr); >> diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c >> index 459e0e93d7b1..8e4e3cffd677 100644 >> --- a/tools/perf/util/print_insn.c >> +++ b/tools/perf/util/print_insn.c >> @@ -12,6 +12,8 @@ >> #include "machine.h" >> #include "thread.h" >> #include "print_insn.h" >> +#include "map.h" >> +#include "dso.h" >> >> size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) >> { >> @@ -28,12 +30,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) >> #ifdef HAVE_LIBCAPSTONE_SUPPORT >> #include <capstone/capstone.h> >> >> -static int capstone_init(struct machine *machine, csh *cs_handle) >> +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) >> { >> cs_arch arch; >> cs_mode mode; >> >> - if (machine__is(machine, "x86_64")) { >> + if (machine__is(machine, "x86_64") && is64) { >> arch = CS_ARCH_X86; >> mode = CS_MODE_64; >> } else if (machine__normalized_is(machine, "x86")) { >> @@ -93,17 +95,31 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, >> return printed; >> } >> >> +static bool is64bitip(struct machine *machine, struct addr_location *al) >> +{ >> + const struct dso *dso = al->map ? map__dso(al->map) : NULL; >> + >> + if (dso) >> + return dso->is_64_bit; >> + >> + return machine__is(machine, "x86_64") || >> + machine__normalized_is(machine, "arm64") || >> + machine__normalized_is(machine, "s390"); >> +} >> + >> size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, >> - struct machine *machine, FILE *fp) >> + struct machine *machine, FILE *fp, >> + struct addr_location *al) >> { >> csh cs_handle; >> cs_insn *insn; >> size_t count; >> size_t printed = 0; >> int ret; >> + bool is64bit = is64bitip(machine, al); >> >> /* TODO: Try to initiate capstone only once but need a proper place. */ >> - ret = capstone_init(machine, &cs_handle); >> + ret = capstone_init(machine, &cs_handle, is64bit); >> if (ret < 0) { >> /* fallback */ >> return sample__fprintf_insn_raw(sample, fp); >> diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h >> index 465bdcfcc2fd..6447dd41b543 100644 >> --- a/tools/perf/util/print_insn.h >> +++ b/tools/perf/util/print_insn.h >> @@ -10,7 +10,7 @@ struct thread; >> struct machine; >> >> size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, >> - struct machine *machine, FILE *fp); >> + struct machine *machine, FILE *fp, struct addr_location *al); >> size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); >> >> #endif /* PERF_PRINT_INSN_H */ > > Acked-by: Thomas Richter <tmricht@linux.ibm.com> -- Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany -- IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Wolfgang Wendt Geschäftsführung: David Faller Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294 ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2024-04-08 20:35 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2024-03-08 17:06 [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Andi Kleen 2024-03-08 17:06 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Andi Kleen 2024-03-15 12:19 ` Adrian Hunter 2024-03-18 22:06 ` Andi Kleen 2024-03-19 6:52 ` Adrian Hunter 2024-03-20 0:35 ` Andi Kleen 2024-03-22 10:21 ` [PATCH] perf script: Consolidate capstone print functions Adrian Hunter 2024-04-08 20:35 ` Arnaldo Carvalho de Melo 2024-03-22 10:21 ` [PATCH v4 2/2] perf, script, capstone: Add support for -F +brstackdisasm Adrian Hunter 2024-03-15 12:19 ` [PATCH v4 1/2] perf, capstone: Support 32bit code under 64bit OS Adrian Hunter 2024-03-22 10:21 ` Adrian Hunter 2024-03-22 10:47 ` Thomas Richter
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).