* [PATCH 01/86] perf annotate: Check for fused instructions
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 02/86] perf annotate: Implement visual marker for macro fusion Arnaldo Carvalho de Melo
` (7 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Jin Yao, Alexander Shishkin,
Andi Kleen, Jiri Olsa, Kan Liang, Peter Zijlstra,
Arnaldo Carvalho de Melo
From: Jin Yao <yao.jin@linux.intel.com>
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/arch/x86/annotate/instructions.c | 46 +++++++++++++++++++++++++++++
tools/perf/builtin-top.c | 2 +-
tools/perf/ui/browsers/annotate.c | 4 ++-
tools/perf/ui/gtk/annotate.c | 2 +-
tools/perf/util/annotate.c | 22 ++++++++++++--
tools/perf/util/annotate.h | 3 +-
tools/perf/util/evsel.c | 7 +++++
tools/perf/util/evsel.h | 1 +
8 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index c1625f256df3..d84b72063a30 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -76,3 +76,49 @@ static struct ins x86__instructions[] = {
{ .name = "xbeginq", .ops = &jump_ops, },
{ .name = "retq", .ops = &ret_ops, },
};
+
+static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
+{
+ if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
+ return false;
+
+ if (arch->model == 0x1e) {
+ /* Nehalem */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test")) {
+ return true;
+ }
+ } else {
+ /* Newer platform */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test") ||
+ strstr(ins1, "add") ||
+ strstr(ins1, "sub") ||
+ strstr(ins1, "and") ||
+ strstr(ins1, "inc") ||
+ strstr(ins1, "dec")) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int x86__cpuid_parse(struct arch *arch, char *cpuid)
+{
+ unsigned int family, model, stepping;
+ int ret;
+
+ /*
+ * cpuid = "GenuineIntel,family,model,stepping"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+ if (ret == 3) {
+ arch->family = family;
+ arch->model = model;
+ return 0;
+ }
+
+ return -1;
+}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6052376634c0..022486dc67f5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__disassemble(sym, map, NULL, 0, NULL);
+ err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL);
if (err == 0) {
out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 27f41f28dcb4..c4336138b673 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -9,6 +9,7 @@
#include "../../util/symbol.h"
#include "../../util/evsel.h"
#include "../../util/config.h"
+#include "../../util/evlist.h"
#include <inttypes.h>
#include <pthread.h>
#include <linux/kernel.h>
@@ -1074,7 +1075,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
}
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- sizeof_bdl, &browser.arch);
+ sizeof_bdl, &browser.arch,
+ perf_evsel__env_cpuid(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index d903fd493416..87e3760624f2 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
return -1;
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL);
+ 0, NULL, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index be1caabb9290..8748ebb3f932 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -47,7 +47,12 @@ struct arch {
bool sorted_instructions;
bool initialized;
void *priv;
+ unsigned int model;
+ unsigned int family;
int (*init)(struct arch *arch);
+ bool (*ins_is_fused)(struct arch *arch, const char *ins1,
+ const char *ins2);
+ int (*cpuid_parse)(struct arch *arch, char *cpuid);
struct {
char comment_char;
char skip_functions_char;
@@ -129,6 +134,8 @@ static struct arch architectures[] = {
.name = "x86",
.instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions),
+ .ins_is_fused = x86__ins_is_fused,
+ .cpuid_parse = x86__cpuid_parse,
.objdump = {
.comment_char = '#',
},
@@ -171,6 +178,14 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
}
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+ if (!arch || !arch->ins_is_fused)
+ return false;
+
+ return arch->ins_is_fused(arch, ins1, ins2);
+}
+
static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
{
char *endptr, *tok, *name;
@@ -1381,7 +1396,7 @@ static const char *annotate__norm_arch(const char *arch_name)
int symbol__disassemble(struct symbol *sym, struct map *map,
const char *arch_name, size_t privsize,
- struct arch **parch)
+ struct arch **parch, char *cpuid)
{
struct dso *dso = map->dso;
char command[PATH_MAX * 2];
@@ -1418,6 +1433,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
}
}
+ if (arch->cpuid_parse && cpuid)
+ arch->cpuid_parse(arch, cpuid);
+
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
symfs_filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end));
@@ -1907,7 +1925,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
u64 len;
if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL) < 0)
+ 0, NULL, NULL) < 0)
return -1;
len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 21055034aedd..72d72728a0fc 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -53,6 +53,7 @@ bool ins__is_jump(const struct ins *ins);
bool ins__is_call(const struct ins *ins);
bool ins__is_ret(const struct ins *ins);
int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
struct annotation;
@@ -160,7 +161,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__disassemble(struct symbol *sym, struct map *map,
const char *arch_name, size_t privsize,
- struct arch **parch);
+ struct arch **parch, char *cpuid);
enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 413f74df08de..0e4cd6092564 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2610,3 +2610,10 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel)
return evsel->evlist->env->arch;
return NULL;
}
+
+char *perf_evsel__env_cpuid(struct perf_evsel *evsel)
+{
+ if (evsel && evsel->evlist && evsel->evlist->env)
+ return evsel->evlist->env->cpuid;
+ return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d101695c482c..219ad0cdb9f4 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -436,5 +436,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv);
char *perf_evsel__env_arch(struct perf_evsel *evsel);
+char *perf_evsel__env_cpuid(struct perf_evsel *evsel);
#endif /* __PERF_EVSEL_H */
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 02/86] perf annotate: Implement visual marker for macro fusion
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 01/86] perf annotate: Check for fused instructions Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 03/86] perf trace: Remove F_ from some of the fcntl command strings Arnaldo Carvalho de Melo
` (6 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Jin Yao, Alexander Shishkin,
Andi Kleen, Jiri Olsa, Kan Liang, Peter Zijlstra,
Arnaldo Carvalho de Melo
From: Jin Yao <yao.jin@linux.intel.com>
For marking fused instructions clearly this patch adds a line before the
first instruction of pair and joins it with the arrow of the jump to its
target.
For example, when "je" is selected in annotate view, the line before
cmpl is displayed and joins the arrow of "je".
│ ┌──cmpl $0x0,argp_program_version_hook
81.93 │ ├──je 20
│ │ lock cmpxchg %esi,0x38a9a4(%rip)
│ │↓ jne 29
│ │↓ jmp 43
11.47 │20:└─→cmpxch %esi,0x38a999(%rip)
That means the cmpl+je is a fused instruction pair and they should be
considered together.
Changelog:
v3: Use Arnaldo's fix to improve the arrow origin rendering. To get the
evsel->evlist->env->cpuid, save the evsel in annotate_browser.
v2: new function "ins__is_fused" to check if the instructions are fused.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-3-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/ui/browser.c | 29 +++++++++++++++++++++++++++++
tools/perf/ui/browser.h | 2 ++
tools/perf/ui/browsers/annotate.c | 26 ++++++++++++++++++++++++++
tools/perf/util/annotate.c | 5 +++++
tools/perf/util/annotate.h | 1 +
5 files changed, 63 insertions(+)
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 83874b0e266c..f73f3f13e01d 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -738,6 +738,35 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
__ui_browser__line_arrow_down(browser, column, start, end);
}
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+ unsigned int row, bool arrow_down)
+{
+ unsigned int end_row;
+
+ if (row >= browser->top_idx)
+ end_row = row - browser->top_idx;
+ else
+ return;
+
+ SLsmg_set_char_set(1);
+
+ if (arrow_down) {
+ ui_browser__gotorc(browser, end_row, column - 1);
+ SLsmg_write_char(SLSMG_ULCORN_CHAR);
+ ui_browser__gotorc(browser, end_row, column);
+ SLsmg_draw_hline(2);
+ ui_browser__gotorc(browser, end_row + 1, column - 1);
+ SLsmg_write_char(SLSMG_LTEE_CHAR);
+ } else {
+ ui_browser__gotorc(browser, end_row, column - 1);
+ SLsmg_write_char(SLSMG_LTEE_CHAR);
+ ui_browser__gotorc(browser, end_row, column);
+ SLsmg_draw_hline(2);
+ }
+
+ SLsmg_set_char_set(0);
+}
+
void ui_browser__init(void)
{
int i = 0;
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index be3b70eb5fca..a12eff75638b 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -43,6 +43,8 @@ void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
void ui_browser__write_graph(struct ui_browser *browser, int graph);
void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
u64 start, u64 end);
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+ unsigned int row, bool arrow_down);
void __ui_browser__show_title(struct ui_browser *browser, const char *title);
void ui_browser__show_title(struct ui_browser *browser, const char *title);
int ui_browser__show(struct ui_browser *browser, const char *title,
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index c4336138b673..8d3f6f53c122 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -273,6 +273,25 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy
return true;
}
+static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
+{
+ struct disasm_line *pos = list_prev_entry(cursor, node);
+ const char *name;
+
+ if (!pos)
+ return false;
+
+ if (ins__is_lock(&pos->ins))
+ name = pos->ops.locked.ins.name;
+ else
+ name = pos->ins.name;
+
+ if (!name || !cursor->ins.name)
+ return false;
+
+ return ins__is_fused(ab->arch, name, cursor->ins.name);
+}
+
static void annotate_browser__draw_current_jump(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -308,6 +327,13 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
from, to);
+
+ if (is_fused(ab, cursor)) {
+ ui_browser__mark_fused(browser,
+ pcnt_width + 3 + ab->addr_width,
+ from - 1,
+ to > from ? true : false);
+ }
}
static unsigned int annotate_browser__refresh(struct ui_browser *browser)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8748ebb3f932..ef434b53d849 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -517,6 +517,11 @@ bool ins__is_ret(const struct ins *ins)
return ins->ops == &ret_ops;
}
+bool ins__is_lock(const struct ins *ins)
+{
+ return ins->ops == &lock_ops;
+}
+
static int ins__key_cmp(const void *name, const void *insp)
{
const struct ins *ins = insp;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 72d72728a0fc..bac698d7cc6a 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -52,6 +52,7 @@ struct ins_ops {
bool ins__is_jump(const struct ins *ins);
bool ins__is_call(const struct ins *ins);
bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 03/86] perf trace: Remove F_ from some of the fcntl command strings
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 01/86] perf annotate: Check for fused instructions Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 02/86] perf annotate: Implement visual marker for macro fusion Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 04/86] perf trace: Beautify linux specific fcntl commands Arnaldo Carvalho de Melo
` (5 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
The initial ones already had that "F_" prefix stripped to make things
shorter, some hadn't, do it now.
We do this to make the 'perf trace' output more compact. At some point
perhaps the best thing to do is to have the tool do this stripping
automatically, letting the user also decide if this is to be done or
not. For now, be consistent.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-2iot106xkl8rgb0hb8zm3gq5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-trace.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4b2a5d298197..cfe1858ed074 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -407,9 +407,9 @@ static DEFINE_STRARRAY(whences);
static const char *fcntl_cmds[] = {
"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
- "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
- "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
- "F_GETOWNER_UIDS",
+ "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
+ "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
+ "GETOWNER_UIDS",
};
static DEFINE_STRARRAY(fcntl_cmds);
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 04/86] perf trace: Beautify linux specific fcntl commands
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (2 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 03/86] perf trace: Remove F_ from some of the fcntl command strings Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 05/86] tools: Update include/uapi/linux/fcntl.h copy from the kernel Arnaldo Carvalho de Melo
` (4 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
We were only beautifying (transforming from an integer to its name) the
non-linux specific fcntl syscall cmd args, fix it:
Before:
# perf trace -e fcntl -p 2472
0.000 ( 0.017 ms): gnome-terminal/2472 fcntl(fd: 55, cmd: 1030) = 56
^C#
After:
# trace -e fcntl -p 2472
0.000 ( 0.015 ms): gnome-terminal/2472 fcntl(fd: 55, cmd: DUPFD_CLOEXEC) = 56
^C#
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-zigsxruk4wbfn8iylboy9wzo@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-trace.c | 57 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 54 insertions(+), 3 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index cfe1858ed074..431ef70067ed 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -64,6 +64,10 @@
# define O_CLOEXEC 02000000
#endif
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
struct trace {
struct perf_tool tool;
struct syscalltbl *sctbl;
@@ -317,6 +321,38 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+struct strarrays {
+ int nr_entries;
+ struct strarray **entries;
+};
+
+#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
+ .nr_entries = ARRAY_SIZE(array), \
+ .entries = array, \
+}
+
+static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ struct strarrays *sas = arg->parm;
+ int i;
+
+ for (i = 0; i < sas->nr_entries; ++i) {
+ struct strarray *sa = sas->entries[i];
+ int idx = arg->val - sa->offset;
+
+ if (idx >= 0 && idx < sa->nr_entries) {
+ if (sa->entries[idx] == NULL)
+ break;
+ return scnprintf(bf, size, "%s", sa->entries[idx]);
+ }
+ }
+
+ return scnprintf(bf, size, "%d", arg->val);
+}
+
+#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+
#if defined(__i386__) || defined(__x86_64__)
/*
* FIXME: Make this available to all arches as soon as the ioctl beautifier
@@ -413,6 +449,20 @@ static const char *fcntl_cmds[] = {
};
static DEFINE_STRARRAY(fcntl_cmds);
+static const char *fcntl_linux_specific_cmds[] = {
+ "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
+ "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
+};
+
+static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
+
+static struct strarray *fcntl_cmds_arrays[] = {
+ &strarray__fcntl_cmds,
+ &strarray__fcntl_linux_specific_cmds,
+};
+
+static DEFINE_STRARRAYS(fcntl_cmds_arrays);
+
static const char *rlimit_resources[] = {
"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
@@ -613,8 +663,8 @@ static struct syscall_fmt {
{ .name = "fchownat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
{ .name = "fcntl", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
- .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
+ .arg_scnprintf = { [1] = SCA_STRARRAYS, /* cmd */ },
+ .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, },
{ .name = "fdatasync", .errmsg = true, },
{ .name = "flock", .errmsg = true,
.arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
@@ -1356,7 +1406,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
*/
if (val == 0 &&
!(sc->arg_scnprintf &&
- sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
+ (sc->arg_scnprintf[arg.idx] == SCA_STRARRAY ||
+ sc->arg_scnprintf[arg.idx] == SCA_STRARRAYS) &&
sc->arg_parm[arg.idx]))
continue;
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 05/86] tools: Update include/uapi/linux/fcntl.h copy from the kernel
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (3 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 04/86] perf trace: Beautify linux specific fcntl commands Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 06/86] perf trace beauty: Export the strarrays scnprintf method Arnaldo Carvalho de Melo
` (3 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jens Axboe, Jiri Olsa,
Martin K . Petersen, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
To get the changes in the commit c75b1d9421f8 ("fs: add fcntl()
interface for setting/getting write life time hints").
Silencing this perf build warning:
Warning: include/uapi/linux/fcntl.h differs from kernel
We already beautify the fcntl cmd argument, so an upcoming cset will
update the 'cmd' strarray to cover these new commands.
The hints are in the 3rd arg, a pointer, so not yet supported in 'perf
trace', for that we need to copy it somehow, probably using eBPF, a new
attempt at doing that is planned.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-al471wzs3x48alql0tm3mnfa@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/include/uapi/linux/fcntl.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@
/* (1U << 31) is reserved for signed error codes */
/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE 1
+#define RWH_WRITE_LIFE_SHORT 2
+#define RWH_WRITE_LIFE_MEDIUM 3
+#define RWH_WRITE_LIFE_LONG 4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 06/86] perf trace beauty: Export the strarrays scnprintf method
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (4 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 05/86] tools: Update include/uapi/linux/fcntl.h copy from the kernel Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 07/86] perf trace: Only build tools/perf/trace/beauty/ when building 'perf trace' Arnaldo Carvalho de Melo
` (2 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
As we'll call it from the fcntl cmd scnprintf method, that needs to look
at the cmd to mask the next fcntl argument when it is ignored.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-fzlvkhew5vbxefneuciihgbc@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-trace.c | 6 ++----
tools/perf/trace/beauty/beauty.h | 3 +++
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 431ef70067ed..ef1b1d4ea007 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -331,8 +331,8 @@ struct strarrays {
.entries = array, \
}
-static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
- struct syscall_arg *arg)
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
+ struct syscall_arg *arg)
{
struct strarrays *sas = arg->parm;
int i;
@@ -351,8 +351,6 @@ static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
return scnprintf(bf, size, "%d", arg->val);
}
-#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
-
#if defined(__i386__) || defined(__x86_64__)
/*
* FIXME: Make this available to all arches as soon as the ioctl beautifier
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index cf50be3f17a4..a6348073a6e9 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -15,6 +15,9 @@ struct syscall_arg {
u8 mask;
};
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+
size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 07/86] perf trace: Only build tools/perf/trace/beauty/ when building 'perf trace'
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (5 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 06/86] perf trace beauty: Export the strarrays scnprintf method Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:55 ` [PATCH 08/86] perf trace beauty: Mask ignored fcntl 'arg' parameter Arnaldo Carvalho de Melo
2017-07-19 13:56 ` [PATCH 09/86] perf trace beauty: Allow accessing syscall args values in a syscall arg formatter Arnaldo Carvalho de Melo
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
As it calls functions in builtin-trace.c.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-bt3lhw1rvy3jzbsp2fvvegb0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/Build | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/Build b/tools/perf/Build
index bd8eeb60533c..b48ca40fccf9 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -50,6 +50,6 @@ libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
-libperf-y += trace/beauty/
+libperf-$(CONFIG_AUDIT) += trace/beauty/
gtk-y += ui/gtk/
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 08/86] perf trace beauty: Mask ignored fcntl 'arg' parameter
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (6 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 07/86] perf trace: Only build tools/perf/trace/beauty/ when building 'perf trace' Arnaldo Carvalho de Melo
@ 2017-07-19 13:55 ` Arnaldo Carvalho de Melo
2017-07-19 13:56 ` [PATCH 09/86] perf trace beauty: Allow accessing syscall args values in a syscall arg formatter Arnaldo Carvalho de Melo
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
A series of fcntl cmds ignore the third argument, so mask it.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-6vtl3zq1tauamrhm8o380ptn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-trace.c | 2 +-
tools/perf/trace/beauty/Build | 1 +
tools/perf/trace/beauty/beauty.h | 3 +++
tools/perf/trace/beauty/fcntl.c | 23 +++++++++++++++++++++++
4 files changed, 28 insertions(+), 1 deletion(-)
create mode 100644 tools/perf/trace/beauty/fcntl.c
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ef1b1d4ea007..b7f79dea3c44 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -661,7 +661,7 @@ static struct syscall_fmt {
{ .name = "fchownat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
{ .name = "fcntl", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_STRARRAYS, /* cmd */ },
+ .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */ },
.arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, },
{ .name = "fdatasync", .errmsg = true, },
{ .name = "flock", .errmsg = true,
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index be95ac6ce845..c9e215b806f1 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -1 +1,2 @@
+libperf-y += fcntl.o
libperf-y += statx.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index a6348073a6e9..ce01079d8422 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -18,6 +18,9 @@ struct syscall_arg {
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd
+
size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
new file mode 100644
index 000000000000..7e4582c9308e
--- /dev/null
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -0,0 +1,23 @@
+/*
+ * trace/beauty/fcntl.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <uapi/linux/fcntl.h>
+
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg)
+{
+ /*
+ * Some commands ignore the third fcntl argument, "arg", so mask it
+ */
+ if (arg->val == F_GETFD || arg->val == F_GETFL ||
+ arg->val == F_GETOWN || arg->val == F_GET_SEALS ||
+ arg->val == F_GETLEASE || arg->val == F_GETSIG)
+ arg->mask |= (1 << 2);
+
+ return syscall_arg__scnprintf_strarrays(bf, size, arg);
+}
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 09/86] perf trace beauty: Allow accessing syscall args values in a syscall arg formatter
2017-07-19 13:55 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
` (7 preceding siblings ...)
2017-07-19 13:55 ` [PATCH 08/86] perf trace beauty: Mask ignored fcntl 'arg' parameter Arnaldo Carvalho de Melo
@ 2017-07-19 13:56 ` Arnaldo Carvalho de Melo
8 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 13:56 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
Adrian Hunter, David Ahern, Jiri Olsa, Namhyung Kim, Wang Nan
From: Arnaldo Carvalho de Melo <acme@redhat.com>
For instance, fcntl's upcoming 'arg' formatter needs to look at the
'cmd' value to decide how to format its value, sometimes it is a file
flags, sometimes an fd, a pointer to a structure, etc.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-2tw2jfaqm48dtw8a4addghze@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-trace.c | 23 ++++++++++++++++-------
tools/perf/trace/beauty/beauty.h | 13 +++++++++++++
2 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b7f79dea3c44..40bc0a326096 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1369,19 +1369,32 @@ static int trace__validate_ev_qualifier(struct trace *trace)
* variable to read it. Most notably this avoids extended load instructions
* on unaligned addresses
*/
+static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
+{
+ unsigned long val;
+ unsigned char *p = args + sizeof(unsigned long) * idx;
+
+ memcpy(&val, p, sizeof(val));
+ return val;
+}
+
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
+{
+ return __syscall_arg__val(arg->args, idx);
+}
static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
unsigned char *args, struct trace *trace,
struct thread *thread)
{
size_t printed = 0;
- unsigned char *p;
unsigned long val;
if (sc->args != NULL) {
struct format_field *field;
u8 bit = 1;
struct syscall_arg arg = {
+ .args = args,
.idx = 0,
.mask = 0,
.trace = trace,
@@ -1393,9 +1406,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (arg.mask & bit)
continue;
- /* special care for unaligned accesses */
- p = args + sizeof(unsigned long) * arg.idx;
- memcpy(&val, p, sizeof(val));
+ val = syscall_arg__val(&arg, arg.idx);
/*
* Suppress this argument if its value is zero and
@@ -1431,9 +1442,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
int i = 0;
while (i < 6) {
- /* special care for unaligned accesses */
- p = args + sizeof(unsigned long) * i;
- memcpy(&val, p, sizeof(val));
+ val = __syscall_arg__val(args, i);
printed += scnprintf(bf + printed, size - printed,
"%sarg%d: %ld",
printed ? ", " : "", i, val);
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index ce01079d8422..6fbac0c8120d 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -6,8 +6,19 @@
struct trace;
struct thread;
+/**
+ * @val: value of syscall argument being formatted
+ * @args: All the args, use syscall_args__val(arg, nth) to access one
+ * @thread: tid state (maps, pid, tid, etc)
+ * @trace: 'perf trace' internals: all threads, etc
+ * @parm: private area, may be an strarray, for instance
+ * @idx: syscall arg idx (is this the first?)
+ * @mask: a syscall arg may mask another arg, see syscall_arg__scnprintf_futex_op
+ */
+
struct syscall_arg {
unsigned long val;
+ unsigned char *args;
struct thread *thread;
struct trace *trace;
void *parm;
@@ -15,6 +26,8 @@ struct syscall_arg {
u8 mask;
};
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
+
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 01/86] perf annotate: Check for fused instructions
2017-07-19 14:28 [GIT PULL 00/86] perf/core improvements and fixes Arnaldo Carvalho de Melo
@ 2017-07-19 14:28 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-07-19 14:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, linux-perf-users, Jin Yao, Alexander Shishkin,
Andi Kleen, Jiri Olsa, Kan Liang, Peter Zijlstra,
Arnaldo Carvalho de Melo
From: Jin Yao <yao.jin@linux.intel.com>
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/arch/x86/annotate/instructions.c | 46 +++++++++++++++++++++++++++++
tools/perf/builtin-top.c | 2 +-
tools/perf/ui/browsers/annotate.c | 4 ++-
tools/perf/ui/gtk/annotate.c | 2 +-
tools/perf/util/annotate.c | 22 ++++++++++++--
tools/perf/util/annotate.h | 3 +-
tools/perf/util/evsel.c | 7 +++++
tools/perf/util/evsel.h | 1 +
8 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index c1625f256df3..d84b72063a30 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -76,3 +76,49 @@ static struct ins x86__instructions[] = {
{ .name = "xbeginq", .ops = &jump_ops, },
{ .name = "retq", .ops = &ret_ops, },
};
+
+static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
+{
+ if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
+ return false;
+
+ if (arch->model == 0x1e) {
+ /* Nehalem */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test")) {
+ return true;
+ }
+ } else {
+ /* Newer platform */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test") ||
+ strstr(ins1, "add") ||
+ strstr(ins1, "sub") ||
+ strstr(ins1, "and") ||
+ strstr(ins1, "inc") ||
+ strstr(ins1, "dec")) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int x86__cpuid_parse(struct arch *arch, char *cpuid)
+{
+ unsigned int family, model, stepping;
+ int ret;
+
+ /*
+ * cpuid = "GenuineIntel,family,model,stepping"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+ if (ret == 3) {
+ arch->family = family;
+ arch->model = model;
+ return 0;
+ }
+
+ return -1;
+}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6052376634c0..022486dc67f5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__disassemble(sym, map, NULL, 0, NULL);
+ err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL);
if (err == 0) {
out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 27f41f28dcb4..c4336138b673 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -9,6 +9,7 @@
#include "../../util/symbol.h"
#include "../../util/evsel.h"
#include "../../util/config.h"
+#include "../../util/evlist.h"
#include <inttypes.h>
#include <pthread.h>
#include <linux/kernel.h>
@@ -1074,7 +1075,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
}
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- sizeof_bdl, &browser.arch);
+ sizeof_bdl, &browser.arch,
+ perf_evsel__env_cpuid(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index d903fd493416..87e3760624f2 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
return -1;
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL);
+ 0, NULL, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index be1caabb9290..8748ebb3f932 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -47,7 +47,12 @@ struct arch {
bool sorted_instructions;
bool initialized;
void *priv;
+ unsigned int model;
+ unsigned int family;
int (*init)(struct arch *arch);
+ bool (*ins_is_fused)(struct arch *arch, const char *ins1,
+ const char *ins2);
+ int (*cpuid_parse)(struct arch *arch, char *cpuid);
struct {
char comment_char;
char skip_functions_char;
@@ -129,6 +134,8 @@ static struct arch architectures[] = {
.name = "x86",
.instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions),
+ .ins_is_fused = x86__ins_is_fused,
+ .cpuid_parse = x86__cpuid_parse,
.objdump = {
.comment_char = '#',
},
@@ -171,6 +178,14 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
}
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+ if (!arch || !arch->ins_is_fused)
+ return false;
+
+ return arch->ins_is_fused(arch, ins1, ins2);
+}
+
static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
{
char *endptr, *tok, *name;
@@ -1381,7 +1396,7 @@ static const char *annotate__norm_arch(const char *arch_name)
int symbol__disassemble(struct symbol *sym, struct map *map,
const char *arch_name, size_t privsize,
- struct arch **parch)
+ struct arch **parch, char *cpuid)
{
struct dso *dso = map->dso;
char command[PATH_MAX * 2];
@@ -1418,6 +1433,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
}
}
+ if (arch->cpuid_parse && cpuid)
+ arch->cpuid_parse(arch, cpuid);
+
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
symfs_filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end));
@@ -1907,7 +1925,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
u64 len;
if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL) < 0)
+ 0, NULL, NULL) < 0)
return -1;
len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 21055034aedd..72d72728a0fc 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -53,6 +53,7 @@ bool ins__is_jump(const struct ins *ins);
bool ins__is_call(const struct ins *ins);
bool ins__is_ret(const struct ins *ins);
int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
struct annotation;
@@ -160,7 +161,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__disassemble(struct symbol *sym, struct map *map,
const char *arch_name, size_t privsize,
- struct arch **parch);
+ struct arch **parch, char *cpuid);
enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 413f74df08de..0e4cd6092564 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2610,3 +2610,10 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel)
return evsel->evlist->env->arch;
return NULL;
}
+
+char *perf_evsel__env_cpuid(struct perf_evsel *evsel)
+{
+ if (evsel && evsel->evlist && evsel->evlist->env)
+ return evsel->evlist->env->cpuid;
+ return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d101695c482c..219ad0cdb9f4 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -436,5 +436,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv);
char *perf_evsel__env_arch(struct perf_evsel *evsel);
+char *perf_evsel__env_cpuid(struct perf_evsel *evsel);
#endif /* __PERF_EVSEL_H */
--
2.9.4
^ permalink raw reply related [flat|nested] 11+ messages in thread