* [PATCH 0/4] perf trace: Add --system-summary option
@ 2025-01-14 21:22 Namhyung Kim
2025-01-14 21:22 ` [PATCH 1/4] perf trace: Allocate syscall stats only if summary is on Namhyung Kim
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-14 21:22 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Howard Chu
Hello,
I've realized that perf trace shows system call summary at the end for
each thread. But sometimes users want to see the global whole system
summary or statistics instead.
So I've added --system-summary option like below:
$ sudo ./perf trace -as --system-summary sleep 1
Summary of events:
total, 21580 events
syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
epoll_wait 1305 0 14716.712 0.000 11.277 551.529 8.87%
futex 1256 89 13331.197 0.000 10.614 733.722 15.49%
poll 669 0 6806.618 0.000 10.174 459.316 11.77%
ppoll 220 0 3968.797 0.000 18.040 516.775 25.35%
clock_nanosleep 1 0 1000.027 1000.027 1000.027 1000.027 0.00%
epoll_pwait 21 0 592.783 0.000 28.228 522.293 88.29%
nanosleep 16 0 60.515 0.000 3.782 10.123 33.33%
ioctl 510 0 4.284 0.001 0.008 0.182 8.84%
recvmsg 1434 775 3.497 0.001 0.002 0.174 6.37%
write 1393 0 2.854 0.001 0.002 0.017 1.79%
read 1063 100 2.236 0.000 0.002 0.083 5.11%
...
This patchset is on top of (but not necessarily depends on) my system
call sampling patch.
https://lore.kernel.org/r/20241219183400.350308-1-namhyung@kernel.org
Thanks,
Namhyung
Namhyung Kim (4):
perf trace: Allocate syscall stats only if summary is on
perf trace: Convert syscall_stats to hashmap
perf tools: Get rid of now-unused rb_resort.h
perf trace: Add --system-summary option
tools/perf/Documentation/perf-trace.txt | 3 +
tools/perf/builtin-trace.c | 222 +++++++++++++++++++-----
tools/perf/util/rb_resort.h | 146 ----------------
3 files changed, 178 insertions(+), 193 deletions(-)
delete mode 100644 tools/perf/util/rb_resort.h
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/4] perf trace: Allocate syscall stats only if summary is on
2025-01-14 21:22 [PATCH 0/4] perf trace: Add --system-summary option Namhyung Kim
@ 2025-01-14 21:22 ` Namhyung Kim
2025-01-14 21:22 ` [PATCH 2/4] perf trace: Convert syscall_stats to hashmap Namhyung Kim
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-14 21:22 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Howard Chu
The syscall stats are used only when summary is requested. Let's avoid
unnecessary operations. Pass 'trace' pointer to check summary and give
output file together.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-trace.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 40dd2dd1320d37a9..0691e817c15b4136 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1523,13 +1523,14 @@ struct thread_trace {
struct intlist *syscall_stats;
};
-static struct thread_trace *thread_trace__new(void)
+static struct thread_trace *thread_trace__new(struct trace *trace)
{
struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
if (ttrace) {
ttrace->files.max = -1;
- ttrace->syscall_stats = intlist__new(NULL);
+ if (trace->summary)
+ ttrace->syscall_stats = intlist__new(NULL);
}
return ttrace;
@@ -1551,7 +1552,7 @@ static void thread_trace__delete(void *pttrace)
free(ttrace);
}
-static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
+static struct thread_trace *thread__trace(struct thread *thread, struct trace *trace)
{
struct thread_trace *ttrace;
@@ -1559,7 +1560,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
goto fail;
if (thread__priv(thread) == NULL)
- thread__set_priv(thread, thread_trace__new());
+ thread__set_priv(thread, thread_trace__new(trace));
if (thread__priv(thread) == NULL)
goto fail;
@@ -1569,7 +1570,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
return ttrace;
fail:
- color_fprintf(fp, PERF_COLOR_RED,
+ color_fprintf(trace->output, PERF_COLOR_RED,
"WARNING: not enough memory, dropping samples!\n");
return NULL;
}
@@ -2623,7 +2624,7 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
return -1;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
- ttrace = thread__trace(thread, trace->output);
+ ttrace = thread__trace(thread, trace);
if (ttrace == NULL)
goto out_put;
@@ -2700,7 +2701,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
return -1;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
- ttrace = thread__trace(thread, trace->output);
+ ttrace = thread__trace(thread, trace);
/*
* We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
* and the rest of the beautifiers accessing it via struct syscall_arg touches it.
@@ -2772,7 +2773,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
return -1;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
- ttrace = thread__trace(thread, trace->output);
+ ttrace = thread__trace(thread, trace);
if (ttrace == NULL)
goto out_put;
@@ -2961,7 +2962,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
struct thread *thread = machine__findnew_thread(trace->host,
sample->pid,
sample->tid);
- struct thread_trace *ttrace = thread__trace(thread, trace->output);
+ struct thread_trace *ttrace = thread__trace(thread, trace);
if (ttrace == NULL)
goto out_dump;
@@ -3215,7 +3216,7 @@ static int trace__pgfault(struct trace *trace,
}
}
- ttrace = thread__trace(thread, trace->output);
+ ttrace = thread__trace(thread, trace);
if (ttrace == NULL)
goto out_put;
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/4] perf trace: Convert syscall_stats to hashmap
2025-01-14 21:22 [PATCH 0/4] perf trace: Add --system-summary option Namhyung Kim
2025-01-14 21:22 ` [PATCH 1/4] perf trace: Allocate syscall stats only if summary is on Namhyung Kim
@ 2025-01-14 21:22 ` Namhyung Kim
2025-01-18 3:03 ` Howard Chu
2025-01-14 21:22 ` [PATCH 3/4] perf tools: Get rid of now-unused rb_resort.h Namhyung Kim
2025-01-14 21:22 ` [PATCH 4/4] perf trace: Add --system-summary option Namhyung Kim
3 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2025-01-14 21:22 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Howard Chu
It was using a RBtree-based int-list as a hash and a custome resort
logic for that. As we have hashmap, let's convert to it and add a
sorting function using an array. It's also to prepare supporting
system-wide syscall stats.
No functional changes intended.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-trace.c | 121 ++++++++++++++++++++++++++++---------
1 file changed, 92 insertions(+), 29 deletions(-)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0691e817c15b4136..ad7f7fcd0d80b9df 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -39,6 +39,7 @@
#include "util/synthetic-events.h"
#include "util/evlist.h"
#include "util/evswitch.h"
+#include "util/hashmap.h"
#include "util/mmap.h"
#include <subcmd/pager.h>
#include <subcmd/exec-cmd.h>
@@ -63,7 +64,6 @@
#include "print_binary.h"
#include "string2.h"
#include "syscalltbl.h"
-#include "rb_resort.h"
#include "../perf.h"
#include "trace_augment.h"
@@ -1520,17 +1520,55 @@ struct thread_trace {
struct file *table;
} files;
- struct intlist *syscall_stats;
+ struct hashmap *syscall_stats;
};
+static size_t id_hash(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool id_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static struct hashmap *alloc_syscall_stats(void)
+{
+ struct hashmap *stats = hashmap__new(id_hash, id_equal, NULL);
+
+ /* just for simpler error check */
+ if (IS_ERR_OR_NULL(stats))
+ stats = NULL;
+ return stats;
+}
+
+static void delete_syscall_stats(struct hashmap *syscall_stats)
+{
+ struct hashmap_entry *pos;
+ size_t bkt;
+
+ if (syscall_stats == NULL)
+ return;
+
+ hashmap__for_each_entry(syscall_stats, pos, bkt)
+ free(pos->pvalue);
+ hashmap__free(syscall_stats);
+}
+
static struct thread_trace *thread_trace__new(struct trace *trace)
{
struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
if (ttrace) {
ttrace->files.max = -1;
- if (trace->summary)
- ttrace->syscall_stats = intlist__new(NULL);
+ if (trace->summary) {
+ ttrace->syscall_stats = alloc_syscall_stats();
+ if (ttrace->syscall_stats == NULL) {
+ free(ttrace);
+ ttrace = NULL;
+ }
+ }
}
return ttrace;
@@ -1545,7 +1583,7 @@ static void thread_trace__delete(void *pttrace)
if (!ttrace)
return;
- intlist__delete(ttrace->syscall_stats);
+ delete_syscall_stats(ttrace->syscall_stats);
ttrace->syscall_stats = NULL;
thread_trace__free_files(ttrace);
zfree(&ttrace->entry_str);
@@ -2464,22 +2502,19 @@ struct syscall_stats {
static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
int id, struct perf_sample *sample, long err, bool errno_summary)
{
- struct int_node *inode;
- struct syscall_stats *stats;
+ struct syscall_stats *stats = NULL;
u64 duration = 0;
- inode = intlist__findnew(ttrace->syscall_stats, id);
- if (inode == NULL)
- return;
-
- stats = inode->priv;
- if (stats == NULL) {
+ if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
stats = zalloc(sizeof(*stats));
if (stats == NULL)
return;
init_stats(&stats->stats);
- inode->priv = stats;
+ if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
+ free(stats);
+ return;
+ }
}
if (ttrace->entry_time && sample->time > ttrace->entry_time)
@@ -4618,18 +4653,45 @@ static size_t trace__fprintf_threads_header(FILE *fp)
return printed;
}
-DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+struct syscall_entry {
struct syscall_stats *stats;
double msecs;
int syscall;
-)
+};
+
+static int entry_cmp(const void *e1, const void *e2)
+{
+ const struct syscall_entry *entry1 = e1;
+ const struct syscall_entry *entry2 = e2;
+
+ return entry1->msecs > entry2->msecs ? -1 : 1;
+}
+
+static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
{
- struct int_node *source = rb_entry(nd, struct int_node, rb_node);
- struct syscall_stats *stats = source->priv;
+ struct syscall_entry *entry;
+ struct hashmap_entry *pos;
+ unsigned bkt, i, nr;
+
+ nr = ttrace->syscall_stats->sz;
+ entry = malloc(nr * sizeof(*entry));
+ if (entry == NULL)
+ return NULL;
- entry->syscall = source->i;
- entry->stats = stats;
- entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
+ i = 0;
+ hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
+ struct syscall_stats *ss = pos->pvalue;
+ struct stats *st = &ss->stats;
+
+ entry[i].stats = ss;
+ entry[i].msecs = (u64)st->n * (avg_stats(st) / NSEC_PER_MSEC);
+ entry[i].syscall = pos->key;
+ i++;
+ }
+ assert(i == nr);
+
+ qsort(entry, nr, sizeof(*entry), entry_cmp);
+ return entry;
}
static size_t thread__dump_stats(struct thread_trace *ttrace,
@@ -4637,10 +4699,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
{
size_t printed = 0;
struct syscall *sc;
- struct rb_node *nd;
- DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
+ struct syscall_entry *entries;
- if (syscall_stats == NULL)
+ entries = thread__sort_stats(ttrace);
+ if (entries == NULL)
return 0;
printed += fprintf(fp, "\n");
@@ -4649,8 +4711,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
- resort_rb__for_each_entry(nd, syscall_stats) {
- struct syscall_stats *stats = syscall_stats_entry->stats;
+ for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
+ struct syscall_entry *entry = &entries[i];
+ struct syscall_stats *stats = entry->stats;
if (stats) {
double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
@@ -4661,10 +4724,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
avg /= NSEC_PER_MSEC;
- sc = &trace->syscalls.table[syscall_stats_entry->syscall];
+ sc = &trace->syscalls.table[entry->syscall];
printed += fprintf(fp, " %-15s", sc->name);
printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
- n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
+ n, stats->nr_failures, entry->msecs, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
if (trace->errno_summary && stats->nr_failures) {
@@ -4678,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
}
}
- resort_rb__delete(syscall_stats);
+ free(entries);
printed += fprintf(fp, "\n\n");
return printed;
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/4] perf tools: Get rid of now-unused rb_resort.h
2025-01-14 21:22 [PATCH 0/4] perf trace: Add --system-summary option Namhyung Kim
2025-01-14 21:22 ` [PATCH 1/4] perf trace: Allocate syscall stats only if summary is on Namhyung Kim
2025-01-14 21:22 ` [PATCH 2/4] perf trace: Convert syscall_stats to hashmap Namhyung Kim
@ 2025-01-14 21:22 ` Namhyung Kim
2025-01-14 21:22 ` [PATCH 4/4] perf trace: Add --system-summary option Namhyung Kim
3 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-14 21:22 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Howard Chu
It was only used in perf trace and it switched to use hashmap instead.
Let's delete the code.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/util/rb_resort.h | 146 ------------------------------------
1 file changed, 146 deletions(-)
delete mode 100644 tools/perf/util/rb_resort.h
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
deleted file mode 100644
index f4aaee33fe70dd81..0000000000000000
--- a/tools/perf/util/rb_resort.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_RESORT_RB_H_
-#define _PERF_RESORT_RB_H_
-/*
- * Template for creating a class to resort an existing rb_tree according to
- * a new sort criteria, that must be present in the entries of the source
- * rb_tree.
- *
- * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Quick example, resorting threads by its shortname:
- *
- * First define the prefix (threads) to be used for the functions and data
- * structures created, and provide an expression for the sorting, then the
- * fields to be present in each of the entries in the new, sorted, rb_tree.
- *
- * The body of the init function should collect the fields, maybe
- * pre-calculating them from multiple entries in the original 'entry' from
- * the rb_tree used as a source for the entries to be sorted:
-
-DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
- b->thread->shortname) < 0,
- struct thread *thread;
-)
-{
- entry->thread = rb_entry(nd, struct thread, rb_node);
-}
-
- * After this it is just a matter of instantiating it and iterating it,
- * for a few data structures with existing rb_trees, such as 'struct machine',
- * helpers are available to get the rb_root and the nr_entries:
-
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
-
- * This will instantiate the new rb_tree and a cursor for it, that can be used as:
-
- struct rb_node *nd;
-
- resort_rb__for_each_entry(nd, threads) {
- struct thread *t = threads_entry;
- printf("%s: %d\n", t->shortname, t->tid);
- }
-
- * Then delete it:
-
- resort_rb__delete(threads);
-
- * The name of the data structures and functions will have a _sorted suffix
- * right before the method names, i.e. will look like:
- *
- * struct threads_sorted_entry {}
- * threads_sorted__insert()
- */
-
-#define DEFINE_RESORT_RB(__name, __comp, ...) \
-struct __name##_sorted_entry { \
- struct rb_node rb_node; \
- __VA_ARGS__ \
-}; \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry); \
- \
-static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
-{ \
- struct __name##_sorted_entry *a, *b; \
- a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
- b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
- return __comp; \
-} \
- \
-struct __name##_sorted { \
- struct rb_root entries; \
- struct __name##_sorted_entry nd[0]; \
-}; \
- \
-static void __name##_sorted__insert(struct __name##_sorted *sorted, \
- struct rb_node *sorted_nd) \
-{ \
- struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
- while (*p != NULL) { \
- parent = *p; \
- if (__name##_sorted__cmp(sorted_nd, parent)) \
- p = &(*p)->rb_left; \
- else \
- p = &(*p)->rb_right; \
- } \
- rb_link_node(sorted_nd, parent, p); \
- rb_insert_color(sorted_nd, &sorted->entries); \
-} \
- \
-static void __name##_sorted__sort(struct __name##_sorted *sorted, \
- struct rb_root *entries) \
-{ \
- struct rb_node *nd; \
- unsigned int i = 0; \
- for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
- struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
- __name##_sorted__init_entry(nd, snd); \
- __name##_sorted__insert(sorted, &snd->rb_node); \
- } \
-} \
- \
-static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
- int nr_entries) \
-{ \
- struct __name##_sorted *sorted; \
- sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
- if (sorted) { \
- sorted->entries = RB_ROOT; \
- __name##_sorted__sort(sorted, entries); \
- } \
- return sorted; \
-} \
- \
-static void __name##_sorted__delete(struct __name##_sorted *sorted) \
-{ \
- free(sorted); \
-} \
- \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry)
-
-#define DECLARE_RESORT_RB(__name) \
-struct __name##_sorted_entry *__name##_entry; \
-struct __name##_sorted *__name = __name##_sorted__new
-
-#define resort_rb__for_each_entry(__nd, __name) \
- for (__nd = rb_first(&__name->entries); \
- __nd && (__name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
- rb_node)); \
- __nd = rb_next(__nd))
-
-#define resort_rb__delete(__name) \
- __name##_sorted__delete(__name), __name = NULL
-
-/*
- * Helpers for other classes that contains both an rbtree and the
- * number of entries in it:
- */
-
-/* For 'struct intlist' */
-#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
- DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
- __ilist->rblist.nr_entries)
-
-#endif /* _PERF_RESORT_RB_H_ */
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/4] perf trace: Add --system-summary option
2025-01-14 21:22 [PATCH 0/4] perf trace: Add --system-summary option Namhyung Kim
` (2 preceding siblings ...)
2025-01-14 21:22 ` [PATCH 3/4] perf tools: Get rid of now-unused rb_resort.h Namhyung Kim
@ 2025-01-14 21:22 ` Namhyung Kim
2025-01-18 3:07 ` Howard Chu
3 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2025-01-14 21:22 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Howard Chu
$ sudo ./perf trace -as --system-summary sleep 1
Summary of events:
total, 21580 events
syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
epoll_wait 1305 0 14716.712 0.000 11.277 551.529 8.87%
futex 1256 89 13331.197 0.000 10.614 733.722 15.49%
poll 669 0 6806.618 0.000 10.174 459.316 11.77%
ppoll 220 0 3968.797 0.000 18.040 516.775 25.35%
clock_nanosleep 1 0 1000.027 1000.027 1000.027 1000.027 0.00%
epoll_pwait 21 0 592.783 0.000 28.228 522.293 88.29%
nanosleep 16 0 60.515 0.000 3.782 10.123 33.33%
ioctl 510 0 4.284 0.001 0.008 0.182 8.84%
recvmsg 1434 775 3.497 0.001 0.002 0.174 6.37%
write 1393 0 2.854 0.001 0.002 0.017 1.79%
read 1063 100 2.236 0.000 0.002 0.083 5.11%
...
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-trace.txt | 3 +
tools/perf/builtin-trace.c | 100 +++++++++++++++++++-----
2 files changed, 85 insertions(+), 18 deletions(-)
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 2620c4f4601c8701..72f29f82649e320a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -150,6 +150,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
To be used with -s or -S, to show stats for the errnos experienced by
syscalls, using only this option will trigger --summary.
+--system-summary::
+ To be used with -s or -S, to show system-wide summary instead of per-thread.
+
--tool_stats::
Show tool stats such as number of times fd->pathname was discovered thru
hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ad7f7fcd0d80b9df..00a20052f676847c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -177,8 +177,10 @@ struct trace {
pid_t *entries;
struct bpf_map *map;
} filter_pids;
+ struct hashmap *syscall_stats;
double duration_filter;
double runtime_ms;
+ unsigned long pfmaj, pfmin;
struct {
u64 vfs_getname,
proc_getname;
@@ -198,6 +200,7 @@ struct trace {
bool summary;
bool summary_only;
bool errno_summary;
+ bool system_summary;
bool failure_only;
bool show_comm;
bool print_sample;
@@ -2500,18 +2503,23 @@ struct syscall_stats {
};
static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
- int id, struct perf_sample *sample, long err, bool errno_summary)
+ int id, struct perf_sample *sample, long err,
+ struct trace *trace)
{
+ struct hashmap *syscall_stats = ttrace->syscall_stats;
struct syscall_stats *stats = NULL;
u64 duration = 0;
- if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
+ if (trace->system_summary)
+ syscall_stats = trace->syscall_stats;
+
+ if (!hashmap__find(syscall_stats, id, &stats)) {
stats = zalloc(sizeof(*stats));
if (stats == NULL)
return;
init_stats(&stats->stats);
- if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
+ if (hashmap__add(syscall_stats, id, stats) < 0) {
free(stats);
return;
}
@@ -2525,7 +2533,7 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
if (err < 0) {
++stats->nr_failures;
- if (!errno_summary)
+ if (!trace->errno_summary)
return;
err = -err;
@@ -2817,7 +2825,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
if (trace->summary)
- thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
+ thread__update_stats(thread, ttrace, id, sample, ret, trace);
if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
trace__set_fd_pathname(thread, ret, ttrace->filename.name);
@@ -3255,10 +3263,13 @@ static int trace__pgfault(struct trace *trace,
if (ttrace == NULL)
goto out_put;
- if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+ if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) {
ttrace->pfmaj++;
- else
+ trace->pfmaj++;
+ } else {
ttrace->pfmin++;
+ trace->pfmin++;
+ }
if (trace->summary_only)
goto out;
@@ -3417,6 +3428,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
+static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp);
static bool evlist__add_vfs_getname(struct evlist *evlist)
{
@@ -4329,6 +4341,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist;
}
+ if (trace->system_summary) {
+ trace->syscall_stats = alloc_syscall_stats();
+ if (trace->syscall_stats == NULL)
+ goto out_delete_evlist;
+ }
+
evlist__config(evlist, &trace->opts, &callchain_param);
if (forks) {
@@ -4489,8 +4507,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
if (!err) {
- if (trace->summary)
- trace__fprintf_thread_summary(trace, trace->output);
+ if (trace->summary) {
+ if (trace->system_summary)
+ trace__fprintf_system_summary(trace, trace->output);
+ else
+ trace__fprintf_thread_summary(trace, trace->output);
+ }
if (trace->show_tool_stats) {
fprintf(trace->output, "Stats:\n "
@@ -4502,6 +4524,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
out_delete_evlist:
+ delete_syscall_stats(trace->syscall_stats);
trace__symbols__exit(trace);
evlist__free_syscall_tp_fields(evlist);
evlist__delete(evlist);
@@ -4629,6 +4652,12 @@ static int trace__replay(struct trace *trace)
evsel->handler = trace__pgfault;
}
+ if (trace->system_summary) {
+ trace->syscall_stats = alloc_syscall_stats();
+ if (trace->syscall_stats == NULL)
+ goto out;
+ }
+
setup_pager();
err = perf_session__process_events(session);
@@ -4639,12 +4668,13 @@ static int trace__replay(struct trace *trace)
trace__fprintf_thread_summary(trace, trace->output);
out:
+ delete_syscall_stats(trace->syscall_stats);
perf_session__delete(session);
return err;
}
-static size_t trace__fprintf_threads_header(FILE *fp)
+static size_t trace__fprintf_summary_header(FILE *fp)
{
size_t printed;
@@ -4667,19 +4697,19 @@ static int entry_cmp(const void *e1, const void *e2)
return entry1->msecs > entry2->msecs ? -1 : 1;
}
-static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
+static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats)
{
struct syscall_entry *entry;
struct hashmap_entry *pos;
unsigned bkt, i, nr;
- nr = ttrace->syscall_stats->sz;
+ nr = syscall_stats->sz;
entry = malloc(nr * sizeof(*entry));
if (entry == NULL)
return NULL;
i = 0;
- hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
+ hashmap__for_each_entry(syscall_stats, pos, bkt) {
struct syscall_stats *ss = pos->pvalue;
struct stats *st = &ss->stats;
@@ -4694,14 +4724,14 @@ static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
return entry;
}
-static size_t thread__dump_stats(struct thread_trace *ttrace,
- struct trace *trace, FILE *fp)
+static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
+ struct hashmap *syscall_stats)
{
size_t printed = 0;
struct syscall *sc;
struct syscall_entry *entries;
- entries = thread__sort_stats(ttrace);
+ entries = syscall__sort_stats(syscall_stats);
if (entries == NULL)
return 0;
@@ -4711,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
- for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
+ for (size_t i = 0; i < syscall_stats->sz; i++) {
struct syscall_entry *entry = &entries[i];
struct syscall_stats *stats = entry->stats;
if (stats) {
@@ -4747,6 +4777,17 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
return printed;
}
+static size_t thread__dump_stats(struct thread_trace *ttrace,
+ struct trace *trace, FILE *fp)
+{
+ return syscall__dump_stats(trace, fp, ttrace->syscall_stats);
+}
+
+static size_t system__dump_stats(struct trace *trace, FILE *fp)
+{
+ return syscall__dump_stats(trace, fp, trace->syscall_stats);
+}
+
static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
{
size_t printed = 0;
@@ -4800,7 +4841,7 @@ static int trace_nr_events_cmp(void *priv __maybe_unused,
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
- size_t printed = trace__fprintf_threads_header(fp);
+ size_t printed = trace__fprintf_summary_header(fp);
LIST_HEAD(threads);
if (machine__thread_list(trace->host, &threads) == 0) {
@@ -4815,6 +4856,27 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
return printed;
}
+static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp)
+{
+ size_t printed = trace__fprintf_summary_header(fp);
+
+ printed += fprintf(fp, " total, ");
+ printed += fprintf(fp, "%lu events", trace->nr_events);
+
+ if (trace->pfmaj)
+ printed += fprintf(fp, ", %lu majfaults", trace->pfmaj);
+ if (trace->pfmin)
+ printed += fprintf(fp, ", %lu minfaults", trace->pfmin);
+ if (trace->sched)
+ printed += fprintf(fp, ", %.3f msec\n", trace->runtime_ms);
+ else if (fputc('\n', fp) != EOF)
+ ++printed;
+
+ printed += system__dump_stats(trace, fp);
+
+ return printed;
+}
+
static int trace__set_duration(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -5233,6 +5295,8 @@ int cmd_trace(int argc, const char **argv)
"Show all syscalls and summary with statistics"),
OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
"Show errno stats per syscall, use with -s or -S"),
+ OPT_BOOLEAN(0, "system-summary", &trace.system_summary,
+ "Show system-wide summary instead of per-thread, use with -s or -S"),
OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
"Trace pagefaults", parse_pagefaults, "maj"),
OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
--
2.48.0.rc2.279.g1de40edade-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] perf trace: Convert syscall_stats to hashmap
2025-01-14 21:22 ` [PATCH 2/4] perf trace: Convert syscall_stats to hashmap Namhyung Kim
@ 2025-01-18 3:03 ` Howard Chu
2025-01-23 23:37 ` Namhyung Kim
0 siblings, 1 reply; 9+ messages in thread
From: Howard Chu @ 2025-01-18 3:03 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
Hello Namhyung,
Sorry for the delay caused by health issues. In my opinion, this
unlocks the true power of perf trace—a high-performance, buffered, and
multi-process (or system-wide) tracer.
On Tue, Jan 14, 2025 at 1:22 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> It was using a RBtree-based int-list as a hash and a custome resort
> logic for that. As we have hashmap, let's convert to it and add a
> sorting function using an array. It's also to prepare supporting
> system-wide syscall stats.
>
> No functional changes intended.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/perf/builtin-trace.c | 121 ++++++++++++++++++++++++++++---------
> 1 file changed, 92 insertions(+), 29 deletions(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 0691e817c15b4136..ad7f7fcd0d80b9df 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -39,6 +39,7 @@
> #include "util/synthetic-events.h"
> #include "util/evlist.h"
> #include "util/evswitch.h"
> +#include "util/hashmap.h"
> #include "util/mmap.h"
> #include <subcmd/pager.h>
> #include <subcmd/exec-cmd.h>
> @@ -63,7 +64,6 @@
> #include "print_binary.h"
> #include "string2.h"
> #include "syscalltbl.h"
> -#include "rb_resort.h"
> #include "../perf.h"
> #include "trace_augment.h"
>
> @@ -1520,17 +1520,55 @@ struct thread_trace {
> struct file *table;
> } files;
>
> - struct intlist *syscall_stats;
> + struct hashmap *syscall_stats;
> };
>
> +static size_t id_hash(long key, void *ctx __maybe_unused)
> +{
> + return key;
> +}
> +
> +static bool id_equal(long key1, long key2, void *ctx __maybe_unused)
> +{
> + return key1 == key2;
> +}
> +
> +static struct hashmap *alloc_syscall_stats(void)
> +{
> + struct hashmap *stats = hashmap__new(id_hash, id_equal, NULL);
> +
> + /* just for simpler error check */
> + if (IS_ERR_OR_NULL(stats))
I don't think stats will ever be NULL.
> + stats = NULL;
> + return stats;
> +}
> +
> +static void delete_syscall_stats(struct hashmap *syscall_stats)
> +{
> + struct hashmap_entry *pos;
> + size_t bkt;
> +
> + if (syscall_stats == NULL)
> + return;
> +
> + hashmap__for_each_entry(syscall_stats, pos, bkt)
> + free(pos->pvalue);
> + hashmap__free(syscall_stats);
> +}
> +
> static struct thread_trace *thread_trace__new(struct trace *trace)
> {
> struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
>
> if (ttrace) {
> ttrace->files.max = -1;
> - if (trace->summary)
> - ttrace->syscall_stats = intlist__new(NULL);
> + if (trace->summary) {
> + ttrace->syscall_stats = alloc_syscall_stats();
> + if (ttrace->syscall_stats == NULL) {
> + free(ttrace);
> + ttrace = NULL;
> + }
> + }
> }
>
> return ttrace;
> @@ -1545,7 +1583,7 @@ static void thread_trace__delete(void *pttrace)
> if (!ttrace)
> return;
>
> - intlist__delete(ttrace->syscall_stats);
> + delete_syscall_stats(ttrace->syscall_stats);
> ttrace->syscall_stats = NULL;
> thread_trace__free_files(ttrace);
> zfree(&ttrace->entry_str);
> @@ -2464,22 +2502,19 @@ struct syscall_stats {
> static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
> int id, struct perf_sample *sample, long err, bool errno_summary)
> {
> - struct int_node *inode;
> - struct syscall_stats *stats;
> + struct syscall_stats *stats = NULL;
> u64 duration = 0;
>
> - inode = intlist__findnew(ttrace->syscall_stats, id);
> - if (inode == NULL)
> - return;
> -
> - stats = inode->priv;
> - if (stats == NULL) {
> + if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
> stats = zalloc(sizeof(*stats));
> if (stats == NULL)
> return;
>
> init_stats(&stats->stats);
> - inode->priv = stats;
> + if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
> + free(stats);
> + return;
> + }
> }
>
> if (ttrace->entry_time && sample->time > ttrace->entry_time)
> @@ -4618,18 +4653,45 @@ static size_t trace__fprintf_threads_header(FILE *fp)
> return printed;
> }
>
> -DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
> +struct syscall_entry {
> struct syscall_stats *stats;
> double msecs;
> int syscall;
> -)
> +};
> +
> +static int entry_cmp(const void *e1, const void *e2)
> +{
> + const struct syscall_entry *entry1 = e1;
> + const struct syscall_entry *entry2 = e2;
> +
> + return entry1->msecs > entry2->msecs ? -1 : 1;
> +}
> +
> +static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> {
> - struct int_node *source = rb_entry(nd, struct int_node, rb_node);
> - struct syscall_stats *stats = source->priv;
> + struct syscall_entry *entry;
> + struct hashmap_entry *pos;
> + unsigned bkt, i, nr;
> +
> + nr = ttrace->syscall_stats->sz;
> + entry = malloc(nr * sizeof(*entry));
> + if (entry == NULL)
> + return NULL;
>
> - entry->syscall = source->i;
> - entry->stats = stats;
> - entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
> + i = 0;
> + hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
> + struct syscall_stats *ss = pos->pvalue;
> + struct stats *st = &ss->stats;
> +
> + entry[i].stats = ss;
> + entry[i].msecs = (u64)st->n * (avg_stats(st) / NSEC_PER_MSEC);
> + entry[i].syscall = pos->key;
> + i++;
> + }
> + assert(i == nr);
> +
> + qsort(entry, nr, sizeof(*entry), entry_cmp);
> + return entry;
> }
>
> static size_t thread__dump_stats(struct thread_trace *ttrace,
> @@ -4637,10 +4699,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> {
> size_t printed = 0;
> struct syscall *sc;
> - struct rb_node *nd;
> - DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
> + struct syscall_entry *entries;
>
> - if (syscall_stats == NULL)
> + entries = thread__sort_stats(ttrace);
> + if (entries == NULL)
> return 0;
>
> printed += fprintf(fp, "\n");
> @@ -4649,8 +4711,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
> printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
>
> - resort_rb__for_each_entry(nd, syscall_stats) {
> - struct syscall_stats *stats = syscall_stats_entry->stats;
> + for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
> + struct syscall_entry *entry = &entries[i];
> + struct syscall_stats *stats = entry->stats;
Maybe add a newline here?
> if (stats) {
> double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
> double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
> @@ -4661,10 +4724,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
> avg /= NSEC_PER_MSEC;
>
> - sc = &trace->syscalls.table[syscall_stats_entry->syscall];
> + sc = &trace->syscalls.table[entry->syscall];
> printed += fprintf(fp, " %-15s", sc->name);
> printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
> - n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
> + n, stats->nr_failures, entry->msecs, min, avg);
> printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
>
> if (trace->errno_summary && stats->nr_failures) {
> @@ -4678,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> }
> }
>
> - resort_rb__delete(syscall_stats);
> + free(entries);
> printed += fprintf(fp, "\n\n");
>
> return printed;
> --
> 2.48.0.rc2.279.g1de40edade-goog
>
Thanks,
Howard
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] perf trace: Add --system-summary option
2025-01-14 21:22 ` [PATCH 4/4] perf trace: Add --system-summary option Namhyung Kim
@ 2025-01-18 3:07 ` Howard Chu
2025-01-23 23:41 ` Namhyung Kim
0 siblings, 1 reply; 9+ messages in thread
From: Howard Chu @ 2025-01-18 3:07 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
Hello Namhyung,
On Tue, Jan 14, 2025 at 1:23 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> $ sudo ./perf trace -as --system-summary sleep 1
>
> Summary of events:
>
> total, 21580 events
>
> syscall calls errors total min avg max stddev
> (msec) (msec) (msec) (msec) (%)
> --------------- -------- ------ -------- --------- --------- --------- ------
> epoll_wait 1305 0 14716.712 0.000 11.277 551.529 8.87%
> futex 1256 89 13331.197 0.000 10.614 733.722 15.49%
> poll 669 0 6806.618 0.000 10.174 459.316 11.77%
> ppoll 220 0 3968.797 0.000 18.040 516.775 25.35%
> clock_nanosleep 1 0 1000.027 1000.027 1000.027 1000.027 0.00%
> epoll_pwait 21 0 592.783 0.000 28.228 522.293 88.29%
> nanosleep 16 0 60.515 0.000 3.782 10.123 33.33%
> ioctl 510 0 4.284 0.001 0.008 0.182 8.84%
> recvmsg 1434 775 3.497 0.001 0.002 0.174 6.37%
> write 1393 0 2.854 0.001 0.002 0.017 1.79%
> read 1063 100 2.236 0.000 0.002 0.083 5.11%
> ...
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/perf/Documentation/perf-trace.txt | 3 +
> tools/perf/builtin-trace.c | 100 +++++++++++++++++++-----
> 2 files changed, 85 insertions(+), 18 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
> index 2620c4f4601c8701..72f29f82649e320a 100644
> --- a/tools/perf/Documentation/perf-trace.txt
> +++ b/tools/perf/Documentation/perf-trace.txt
> @@ -150,6 +150,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
> To be used with -s or -S, to show stats for the errnos experienced by
> syscalls, using only this option will trigger --summary.
>
> +--system-summary::
> + To be used with -s or -S, to show system-wide summary instead of per-thread.
Is system-summary a good option name and is it really system-wide? For
example, if I do:
perf $ sudo ./perf trace -s -p 2552
^C
Summary of events:
gmain (2581), 12 events, 2.7%
syscall calls errors total min avg
max stddev
(msec) (msec) (msec)
(msec) (%)
--------------- -------- ------ -------- --------- ---------
--------- ------
inotify_add_watch 5 5 0.071 0.009 0.014
0.022 16.74%
ppoll 1 0 0.000 0.000 0.000
0.000 0.00%
gnome-shell (2647), 20 events, 4.4%
syscall calls errors total min avg
max stddev
(msec) (msec) (msec)
(msec) (%)
--------------- -------- ------ -------- --------- ---------
--------- ------
ppoll 2 0 136.188 0.000 68.094
136.188 100.00%
read 4 2 0.080 0.008 0.020
0.038 35.33%
epoll_wait 2 0 0.044 0.011 0.022
0.032 47.61%
write 2 0 0.040 0.014 0.020
0.026 29.65%
gdbus (2583), 102 events, 22.6%
syscall calls errors total min avg
max stddev
(msec) (msec) (msec)
(msec) (%)
--------------- -------- ------ -------- --------- ---------
--------- ------
ppoll 13 0 1535.789 0.000 118.138
1396.542 90.61%
write 16 0 0.198 0.003 0.012
0.043 24.76%
recvmsg 6 0 0.117 0.005 0.020
0.036 27.08%
read 8 0 0.094 0.003 0.012
0.050 48.51%
sendmsg 2 0 0.071 0.010 0.035
0.061 71.55%
poll 6 0 0.058 0.003 0.010
0.020 27.31%
gnome-shell (2552), 299 events, 66.2%
syscall calls errors total min avg
max stddev
(msec) (msec) (msec)
(msec) (%)
--------------- -------- ------ -------- --------- ---------
--------- ------
ppoll 17 0 1531.485 0.000 90.087
1387.446 90.43%
recvmsg 36 35 0.504 0.003 0.014
0.082 18.35%
mprotect 37 0 0.401 0.005 0.011
0.028 8.54%
write 29 0 0.333 0.003 0.011
0.051 19.53%
read 16 0 0.151 0.003 0.009
0.031 22.48%
sendmsg 4 0 0.130 0.014 0.033
0.059 34.63%
ioctl 1 0 0.050 0.050 0.050
0.050 0.00%
poll 2 0 0.045 0.007 0.023
0.039 71.16%
epoll_wait 1 0 0.028 0.028 0.028
0.028 0.00%
close 2 0 0.017 0.006 0.009
0.012 34.67%
getpid 3 0 0.012 0.003 0.004
0.007 32.84%
fcntl 1 0 0.007 0.007 0.007
0.007 0.00%
The process 2552 has multiple threads, each with its own separate
summary, as the old behavior yields.
perf $ sudo ./perf trace -s -p 2552 --system-summary
^C
Summary of events:
total, 432 events
syscall calls errors total min avg
max stddev
(msec) (msec) (msec)
(msec) (%)
--------------- -------- ------ -------- --------- ---------
--------- ------
ppoll 33 0 524.678 0.000 15.899
160.177 50.77%
recvmsg 56 51 0.602 0.003 0.011
0.033 10.06%
ioctl 21 0 0.542 0.003 0.026
0.256 46.90%
write 36 0 0.432 0.002 0.012
0.033 12.46%
read 26 2 0.328 0.003 0.013
0.037 15.34%
sendmsg 6 0 0.209 0.012 0.035
0.081 30.35%
futex 9 1 0.111 0.000 0.012
0.036 32.24%
inotify_add_watch 5 5 0.073 0.009 0.015
0.024 18.49%
epoll_wait 3 0 0.049 0.008 0.016
0.033 50.05%
poll 4 0 0.039 0.004 0.010
0.017 29.67%
timerfd_settime 2 0 0.032 0.008 0.016
0.024 48.19%
getpid 8 0 0.032 0.003 0.004
0.007 13.33%
close 3 0 0.023 0.006 0.008
0.010 12.89%
fcntl 2 0 0.013 0.005 0.006
0.008 25.18%
kcmp 2 0 0.010 0.003 0.005
0.007 40.29%
And this is the summary that brings everything together. However, I
wouldn't consider it a 'system-wide' summary. Maybe call it
--summary-total, summary-collapsed... or just make it the new default
behavior?
> +
> --tool_stats::
> Show tool stats such as number of times fd->pathname was discovered thru
> hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index ad7f7fcd0d80b9df..00a20052f676847c 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -177,8 +177,10 @@ struct trace {
> pid_t *entries;
> struct bpf_map *map;
> } filter_pids;
> + struct hashmap *syscall_stats;
> double duration_filter;
> double runtime_ms;
> + unsigned long pfmaj, pfmin;
> struct {
> u64 vfs_getname,
> proc_getname;
> @@ -198,6 +200,7 @@ struct trace {
> bool summary;
> bool summary_only;
> bool errno_summary;
> + bool system_summary;
> bool failure_only;
> bool show_comm;
> bool print_sample;
> @@ -2500,18 +2503,23 @@ struct syscall_stats {
> };
>
> static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
> - int id, struct perf_sample *sample, long err, bool errno_summary)
> + int id, struct perf_sample *sample, long err,
> + struct trace *trace)
> {
> + struct hashmap *syscall_stats = ttrace->syscall_stats;
> struct syscall_stats *stats = NULL;
> u64 duration = 0;
>
> - if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
> + if (trace->system_summary)
> + syscall_stats = trace->syscall_stats;
> +
> + if (!hashmap__find(syscall_stats, id, &stats)) {
> stats = zalloc(sizeof(*stats));
> if (stats == NULL)
> return;
>
> init_stats(&stats->stats);
> - if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
> + if (hashmap__add(syscall_stats, id, stats) < 0) {
> free(stats);
> return;
> }
> @@ -2525,7 +2533,7 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
> if (err < 0) {
> ++stats->nr_failures;
>
> - if (!errno_summary)
> + if (!trace->errno_summary)
> return;
>
> err = -err;
> @@ -2817,7 +2825,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
> ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
>
> if (trace->summary)
> - thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
> + thread__update_stats(thread, ttrace, id, sample, ret, trace);
>
> if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
> trace__set_fd_pathname(thread, ret, ttrace->filename.name);
> @@ -3255,10 +3263,13 @@ static int trace__pgfault(struct trace *trace,
> if (ttrace == NULL)
> goto out_put;
>
> - if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
> + if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) {
> ttrace->pfmaj++;
> - else
> + trace->pfmaj++;
> + } else {
> ttrace->pfmin++;
> + trace->pfmin++;
> + }
>
> if (trace->summary_only)
> goto out;
> @@ -3417,6 +3428,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
> }
>
> static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
> +static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp);
>
> static bool evlist__add_vfs_getname(struct evlist *evlist)
> {
> @@ -4329,6 +4341,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> goto out_delete_evlist;
> }
>
> + if (trace->system_summary) {
> + trace->syscall_stats = alloc_syscall_stats();
> + if (trace->syscall_stats == NULL)
> + goto out_delete_evlist;
> + }
> +
> evlist__config(evlist, &trace->opts, &callchain_param);
>
> if (forks) {
> @@ -4489,8 +4507,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
>
> if (!err) {
> - if (trace->summary)
> - trace__fprintf_thread_summary(trace, trace->output);
> + if (trace->summary) {
> + if (trace->system_summary)
> + trace__fprintf_system_summary(trace, trace->output);
> + else
> + trace__fprintf_thread_summary(trace, trace->output);
> + }
>
> if (trace->show_tool_stats) {
> fprintf(trace->output, "Stats:\n "
> @@ -4502,6 +4524,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> }
>
> out_delete_evlist:
> + delete_syscall_stats(trace->syscall_stats);
> trace__symbols__exit(trace);
> evlist__free_syscall_tp_fields(evlist);
> evlist__delete(evlist);
> @@ -4629,6 +4652,12 @@ static int trace__replay(struct trace *trace)
> evsel->handler = trace__pgfault;
> }
>
> + if (trace->system_summary) {
> + trace->syscall_stats = alloc_syscall_stats();
> + if (trace->syscall_stats == NULL)
> + goto out;
> + }
> +
> setup_pager();
>
> err = perf_session__process_events(session);
> @@ -4639,12 +4668,13 @@ static int trace__replay(struct trace *trace)
> trace__fprintf_thread_summary(trace, trace->output);
>
> out:
> + delete_syscall_stats(trace->syscall_stats);
> perf_session__delete(session);
>
> return err;
> }
>
> -static size_t trace__fprintf_threads_header(FILE *fp)
> +static size_t trace__fprintf_summary_header(FILE *fp)
> {
> size_t printed;
>
> @@ -4667,19 +4697,19 @@ static int entry_cmp(const void *e1, const void *e2)
> return entry1->msecs > entry2->msecs ? -1 : 1;
> }
>
> -static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> +static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats)
> {
> struct syscall_entry *entry;
> struct hashmap_entry *pos;
> unsigned bkt, i, nr;
>
> - nr = ttrace->syscall_stats->sz;
> + nr = syscall_stats->sz;
> entry = malloc(nr * sizeof(*entry));
> if (entry == NULL)
> return NULL;
>
> i = 0;
> - hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
> + hashmap__for_each_entry(syscall_stats, pos, bkt) {
> struct syscall_stats *ss = pos->pvalue;
> struct stats *st = &ss->stats;
>
> @@ -4694,14 +4724,14 @@ static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> return entry;
> }
>
> -static size_t thread__dump_stats(struct thread_trace *ttrace,
> - struct trace *trace, FILE *fp)
> +static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
> + struct hashmap *syscall_stats)
> {
> size_t printed = 0;
> struct syscall *sc;
> struct syscall_entry *entries;
>
> - entries = thread__sort_stats(ttrace);
> + entries = syscall__sort_stats(syscall_stats);
> if (entries == NULL)
> return 0;
>
> @@ -4711,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
> printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
>
> - for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
> + for (size_t i = 0; i < syscall_stats->sz; i++) {
> struct syscall_entry *entry = &entries[i];
> struct syscall_stats *stats = entry->stats;
> if (stats) {
> @@ -4747,6 +4777,17 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> return printed;
> }
>
> +static size_t thread__dump_stats(struct thread_trace *ttrace,
> + struct trace *trace, FILE *fp)
> +{
> + return syscall__dump_stats(trace, fp, ttrace->syscall_stats);
> +}
> +
> +static size_t system__dump_stats(struct trace *trace, FILE *fp)
> +{
> + return syscall__dump_stats(trace, fp, trace->syscall_stats);
> +}
Perhaps consider inlining these two functions?
> +
> static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
> {
> size_t printed = 0;
> @@ -4800,7 +4841,7 @@ static int trace_nr_events_cmp(void *priv __maybe_unused,
>
> static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
> {
> - size_t printed = trace__fprintf_threads_header(fp);
> + size_t printed = trace__fprintf_summary_header(fp);
> LIST_HEAD(threads);
>
> if (machine__thread_list(trace->host, &threads) == 0) {
> @@ -4815,6 +4856,27 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
> return printed;
> }
>
> +static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp)
> +{
> + size_t printed = trace__fprintf_summary_header(fp);
> +
> + printed += fprintf(fp, " total, ");
> + printed += fprintf(fp, "%lu events", trace->nr_events);
> +
> + if (trace->pfmaj)
> + printed += fprintf(fp, ", %lu majfaults", trace->pfmaj);
> + if (trace->pfmin)
> + printed += fprintf(fp, ", %lu minfaults", trace->pfmin);
> + if (trace->sched)
> + printed += fprintf(fp, ", %.3f msec\n", trace->runtime_ms);
> + else if (fputc('\n', fp) != EOF)
> + ++printed;
> +
> + printed += system__dump_stats(trace, fp);
> +
> + return printed;
> +}
> +
> static int trace__set_duration(const struct option *opt, const char *str,
> int unset __maybe_unused)
> {
> @@ -5233,6 +5295,8 @@ int cmd_trace(int argc, const char **argv)
> "Show all syscalls and summary with statistics"),
> OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
> "Show errno stats per syscall, use with -s or -S"),
> + OPT_BOOLEAN(0, "system-summary", &trace.system_summary,
> + "Show system-wide summary instead of per-thread, use with -s or -S"),
> OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
> "Trace pagefaults", parse_pagefaults, "maj"),
> OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
> --
> 2.48.0.rc2.279.g1de40edade-goog
>
Thanks,
Howard
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] perf trace: Convert syscall_stats to hashmap
2025-01-18 3:03 ` Howard Chu
@ 2025-01-23 23:37 ` Namhyung Kim
0 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-23 23:37 UTC (permalink / raw)
To: Howard Chu
Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
Hi Howard,
On Fri, Jan 17, 2025 at 07:03:13PM -0800, Howard Chu wrote:
> Hello Namhyung,
>
> Sorry for the delay caused by health issues. In my opinion, this
Sorry to hear that, I hope you are good now.
> unlocks the true power of perf trace—a high-performance, buffered, and
> multi-process (or system-wide) tracer.
>
> On Tue, Jan 14, 2025 at 1:22 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > It was using a RBtree-based int-list as a hash and a custome resort
> > logic for that. As we have hashmap, let's convert to it and add a
> > sorting function using an array. It's also to prepare supporting
> > system-wide syscall stats.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/perf/builtin-trace.c | 121 ++++++++++++++++++++++++++++---------
> > 1 file changed, 92 insertions(+), 29 deletions(-)
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index 0691e817c15b4136..ad7f7fcd0d80b9df 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -39,6 +39,7 @@
> > #include "util/synthetic-events.h"
> > #include "util/evlist.h"
> > #include "util/evswitch.h"
> > +#include "util/hashmap.h"
> > #include "util/mmap.h"
> > #include <subcmd/pager.h>
> > #include <subcmd/exec-cmd.h>
> > @@ -63,7 +64,6 @@
> > #include "print_binary.h"
> > #include "string2.h"
> > #include "syscalltbl.h"
> > -#include "rb_resort.h"
> > #include "../perf.h"
> > #include "trace_augment.h"
> >
> > @@ -1520,17 +1520,55 @@ struct thread_trace {
> > struct file *table;
> > } files;
> >
> > - struct intlist *syscall_stats;
> > + struct hashmap *syscall_stats;
> > };
> >
> > +static size_t id_hash(long key, void *ctx __maybe_unused)
> > +{
> > + return key;
> > +}
> > +
> > +static bool id_equal(long key1, long key2, void *ctx __maybe_unused)
> > +{
> > + return key1 == key2;
> > +}
> > +
> > +static struct hashmap *alloc_syscall_stats(void)
> > +{
> > + struct hashmap *stats = hashmap__new(id_hash, id_equal, NULL);
> > +
> > + /* just for simpler error check */
> > + if (IS_ERR_OR_NULL(stats))
>
> I don't think stats will ever be NULL.
Ok, will change.
>
> > + stats = NULL;
> > + return stats;
> > +}
> > +
> > +static void delete_syscall_stats(struct hashmap *syscall_stats)
> > +{
> > + struct hashmap_entry *pos;
> > + size_t bkt;
> > +
> > + if (syscall_stats == NULL)
> > + return;
> > +
> > + hashmap__for_each_entry(syscall_stats, pos, bkt)
> > + free(pos->pvalue);
> > + hashmap__free(syscall_stats);
> > +}
> > +
> > static struct thread_trace *thread_trace__new(struct trace *trace)
> > {
> > struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
> >
> > if (ttrace) {
> > ttrace->files.max = -1;
> > - if (trace->summary)
> > - ttrace->syscall_stats = intlist__new(NULL);
> > + if (trace->summary) {
> > + ttrace->syscall_stats = alloc_syscall_stats();
> > + if (ttrace->syscall_stats == NULL) {
> > + free(ttrace);
> > + ttrace = NULL;
> > + }
> > + }
> > }
> >
> > return ttrace;
> > @@ -1545,7 +1583,7 @@ static void thread_trace__delete(void *pttrace)
> > if (!ttrace)
> > return;
> >
> > - intlist__delete(ttrace->syscall_stats);
> > + delete_syscall_stats(ttrace->syscall_stats);
> > ttrace->syscall_stats = NULL;
> > thread_trace__free_files(ttrace);
> > zfree(&ttrace->entry_str);
> > @@ -2464,22 +2502,19 @@ struct syscall_stats {
> > static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
> > int id, struct perf_sample *sample, long err, bool errno_summary)
> > {
> > - struct int_node *inode;
> > - struct syscall_stats *stats;
> > + struct syscall_stats *stats = NULL;
> > u64 duration = 0;
> >
> > - inode = intlist__findnew(ttrace->syscall_stats, id);
> > - if (inode == NULL)
> > - return;
> > -
> > - stats = inode->priv;
> > - if (stats == NULL) {
> > + if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
> > stats = zalloc(sizeof(*stats));
> > if (stats == NULL)
> > return;
> >
> > init_stats(&stats->stats);
> > - inode->priv = stats;
> > + if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
> > + free(stats);
> > + return;
> > + }
> > }
> >
> > if (ttrace->entry_time && sample->time > ttrace->entry_time)
> > @@ -4618,18 +4653,45 @@ static size_t trace__fprintf_threads_header(FILE *fp)
> > return printed;
> > }
> >
> > -DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
> > +struct syscall_entry {
> > struct syscall_stats *stats;
> > double msecs;
> > int syscall;
> > -)
> > +};
> > +
> > +static int entry_cmp(const void *e1, const void *e2)
> > +{
> > + const struct syscall_entry *entry1 = e1;
> > + const struct syscall_entry *entry2 = e2;
> > +
> > + return entry1->msecs > entry2->msecs ? -1 : 1;
> > +}
> > +
> > +static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> > {
> > - struct int_node *source = rb_entry(nd, struct int_node, rb_node);
> > - struct syscall_stats *stats = source->priv;
> > + struct syscall_entry *entry;
> > + struct hashmap_entry *pos;
> > + unsigned bkt, i, nr;
> > +
> > + nr = ttrace->syscall_stats->sz;
> > + entry = malloc(nr * sizeof(*entry));
> > + if (entry == NULL)
> > + return NULL;
> >
> > - entry->syscall = source->i;
> > - entry->stats = stats;
> > - entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
> > + i = 0;
> > + hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
> > + struct syscall_stats *ss = pos->pvalue;
> > + struct stats *st = &ss->stats;
> > +
> > + entry[i].stats = ss;
> > + entry[i].msecs = (u64)st->n * (avg_stats(st) / NSEC_PER_MSEC);
> > + entry[i].syscall = pos->key;
> > + i++;
> > + }
> > + assert(i == nr);
> > +
> > + qsort(entry, nr, sizeof(*entry), entry_cmp);
> > + return entry;
> > }
> >
> > static size_t thread__dump_stats(struct thread_trace *ttrace,
> > @@ -4637,10 +4699,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > {
> > size_t printed = 0;
> > struct syscall *sc;
> > - struct rb_node *nd;
> > - DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
> > + struct syscall_entry *entries;
> >
> > - if (syscall_stats == NULL)
> > + entries = thread__sort_stats(ttrace);
> > + if (entries == NULL)
> > return 0;
> >
> > printed += fprintf(fp, "\n");
> > @@ -4649,8 +4711,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
> > printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
> >
> > - resort_rb__for_each_entry(nd, syscall_stats) {
> > - struct syscall_stats *stats = syscall_stats_entry->stats;
> > + for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
> > + struct syscall_entry *entry = &entries[i];
> > + struct syscall_stats *stats = entry->stats;
>
> Maybe add a newline here?
Yep.
Thanks for your review!
Namhyung
>
> > if (stats) {
> > double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
> > double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
> > @@ -4661,10 +4724,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
> > avg /= NSEC_PER_MSEC;
> >
> > - sc = &trace->syscalls.table[syscall_stats_entry->syscall];
> > + sc = &trace->syscalls.table[entry->syscall];
> > printed += fprintf(fp, " %-15s", sc->name);
> > printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
> > - n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
> > + n, stats->nr_failures, entry->msecs, min, avg);
> > printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
> >
> > if (trace->errno_summary && stats->nr_failures) {
> > @@ -4678,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > }
> > }
> >
> > - resort_rb__delete(syscall_stats);
> > + free(entries);
> > printed += fprintf(fp, "\n\n");
> >
> > return printed;
> > --
> > 2.48.0.rc2.279.g1de40edade-goog
> >
>
> Thanks,
> Howard
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] perf trace: Add --system-summary option
2025-01-18 3:07 ` Howard Chu
@ 2025-01-23 23:41 ` Namhyung Kim
0 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-23 23:41 UTC (permalink / raw)
To: Howard Chu
Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
On Fri, Jan 17, 2025 at 07:07:32PM -0800, Howard Chu wrote:
> Hello Namhyung,
>
> On Tue, Jan 14, 2025 at 1:23 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > $ sudo ./perf trace -as --system-summary sleep 1
> >
> > Summary of events:
> >
> > total, 21580 events
> >
> > syscall calls errors total min avg max stddev
> > (msec) (msec) (msec) (msec) (%)
> > --------------- -------- ------ -------- --------- --------- --------- ------
> > epoll_wait 1305 0 14716.712 0.000 11.277 551.529 8.87%
> > futex 1256 89 13331.197 0.000 10.614 733.722 15.49%
> > poll 669 0 6806.618 0.000 10.174 459.316 11.77%
> > ppoll 220 0 3968.797 0.000 18.040 516.775 25.35%
> > clock_nanosleep 1 0 1000.027 1000.027 1000.027 1000.027 0.00%
> > epoll_pwait 21 0 592.783 0.000 28.228 522.293 88.29%
> > nanosleep 16 0 60.515 0.000 3.782 10.123 33.33%
> > ioctl 510 0 4.284 0.001 0.008 0.182 8.84%
> > recvmsg 1434 775 3.497 0.001 0.002 0.174 6.37%
> > write 1393 0 2.854 0.001 0.002 0.017 1.79%
> > read 1063 100 2.236 0.000 0.002 0.083 5.11%
> > ...
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/perf/Documentation/perf-trace.txt | 3 +
> > tools/perf/builtin-trace.c | 100 +++++++++++++++++++-----
> > 2 files changed, 85 insertions(+), 18 deletions(-)
> >
> > diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
> > index 2620c4f4601c8701..72f29f82649e320a 100644
> > --- a/tools/perf/Documentation/perf-trace.txt
> > +++ b/tools/perf/Documentation/perf-trace.txt
> > @@ -150,6 +150,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
> > To be used with -s or -S, to show stats for the errnos experienced by
> > syscalls, using only this option will trigger --summary.
> >
> > +--system-summary::
> > + To be used with -s or -S, to show system-wide summary instead of per-thread.
>
> Is system-summary a good option name and is it really system-wide?
I'm afraid not. :)
> For example, if I do:
>
> perf $ sudo ./perf trace -s -p 2552
> ^C
> Summary of events:
>
> gmain (2581), 12 events, 2.7%
>
> syscall calls errors total min avg
> max stddev
> (msec) (msec) (msec)
> (msec) (%)
> --------------- -------- ------ -------- --------- ---------
> --------- ------
> inotify_add_watch 5 5 0.071 0.009 0.014
> 0.022 16.74%
> ppoll 1 0 0.000 0.000 0.000
> 0.000 0.00%
>
>
> gnome-shell (2647), 20 events, 4.4%
>
> syscall calls errors total min avg
> max stddev
> (msec) (msec) (msec)
> (msec) (%)
> --------------- -------- ------ -------- --------- ---------
> --------- ------
> ppoll 2 0 136.188 0.000 68.094
> 136.188 100.00%
> read 4 2 0.080 0.008 0.020
> 0.038 35.33%
> epoll_wait 2 0 0.044 0.011 0.022
> 0.032 47.61%
> write 2 0 0.040 0.014 0.020
> 0.026 29.65%
>
>
> gdbus (2583), 102 events, 22.6%
>
> syscall calls errors total min avg
> max stddev
> (msec) (msec) (msec)
> (msec) (%)
> --------------- -------- ------ -------- --------- ---------
> --------- ------
> ppoll 13 0 1535.789 0.000 118.138
> 1396.542 90.61%
> write 16 0 0.198 0.003 0.012
> 0.043 24.76%
> recvmsg 6 0 0.117 0.005 0.020
> 0.036 27.08%
> read 8 0 0.094 0.003 0.012
> 0.050 48.51%
> sendmsg 2 0 0.071 0.010 0.035
> 0.061 71.55%
> poll 6 0 0.058 0.003 0.010
> 0.020 27.31%
>
>
> gnome-shell (2552), 299 events, 66.2%
>
> syscall calls errors total min avg
> max stddev
> (msec) (msec) (msec)
> (msec) (%)
> --------------- -------- ------ -------- --------- ---------
> --------- ------
> ppoll 17 0 1531.485 0.000 90.087
> 1387.446 90.43%
> recvmsg 36 35 0.504 0.003 0.014
> 0.082 18.35%
> mprotect 37 0 0.401 0.005 0.011
> 0.028 8.54%
> write 29 0 0.333 0.003 0.011
> 0.051 19.53%
> read 16 0 0.151 0.003 0.009
> 0.031 22.48%
> sendmsg 4 0 0.130 0.014 0.033
> 0.059 34.63%
> ioctl 1 0 0.050 0.050 0.050
> 0.050 0.00%
> poll 2 0 0.045 0.007 0.023
> 0.039 71.16%
> epoll_wait 1 0 0.028 0.028 0.028
> 0.028 0.00%
> close 2 0 0.017 0.006 0.009
> 0.012 34.67%
> getpid 3 0 0.012 0.003 0.004
> 0.007 32.84%
> fcntl 1 0 0.007 0.007 0.007
> 0.007 0.00%
>
> The process 2552 has multiple threads, each with its own separate
> summary, as the old behavior yields.
>
> perf $ sudo ./perf trace -s -p 2552 --system-summary
> ^C
> Summary of events:
>
> total, 432 events
>
> syscall calls errors total min avg
> max stddev
> (msec) (msec) (msec)
> (msec) (%)
> --------------- -------- ------ -------- --------- ---------
> --------- ------
> ppoll 33 0 524.678 0.000 15.899
> 160.177 50.77%
> recvmsg 56 51 0.602 0.003 0.011
> 0.033 10.06%
> ioctl 21 0 0.542 0.003 0.026
> 0.256 46.90%
> write 36 0 0.432 0.002 0.012
> 0.033 12.46%
> read 26 2 0.328 0.003 0.013
> 0.037 15.34%
> sendmsg 6 0 0.209 0.012 0.035
> 0.081 30.35%
> futex 9 1 0.111 0.000 0.012
> 0.036 32.24%
> inotify_add_watch 5 5 0.073 0.009 0.015
> 0.024 18.49%
> epoll_wait 3 0 0.049 0.008 0.016
> 0.033 50.05%
> poll 4 0 0.039 0.004 0.010
> 0.017 29.67%
> timerfd_settime 2 0 0.032 0.008 0.016
> 0.024 48.19%
> getpid 8 0 0.032 0.003 0.004
> 0.007 13.33%
> close 3 0 0.023 0.006 0.008
> 0.010 12.89%
> fcntl 2 0 0.013 0.005 0.006
> 0.008 25.18%
> kcmp 2 0 0.010 0.003 0.005
> 0.007 40.29%
>
> And this is the summary that brings everything together. However, I
> wouldn't consider it a 'system-wide' summary. Maybe call it
> --summary-total, summary-collapsed... or just make it the new default
> behavior?
Maybe --summary-total. I'm not sure if it's ok to change the default.
Probably I need to add a config option at least.
>
> > +
> > --tool_stats::
> > Show tool stats such as number of times fd->pathname was discovered thru
> > hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index ad7f7fcd0d80b9df..00a20052f676847c 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -177,8 +177,10 @@ struct trace {
> > pid_t *entries;
> > struct bpf_map *map;
> > } filter_pids;
> > + struct hashmap *syscall_stats;
> > double duration_filter;
> > double runtime_ms;
> > + unsigned long pfmaj, pfmin;
> > struct {
> > u64 vfs_getname,
> > proc_getname;
> > @@ -198,6 +200,7 @@ struct trace {
> > bool summary;
> > bool summary_only;
> > bool errno_summary;
> > + bool system_summary;
> > bool failure_only;
> > bool show_comm;
> > bool print_sample;
> > @@ -2500,18 +2503,23 @@ struct syscall_stats {
> > };
> >
> > static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
> > - int id, struct perf_sample *sample, long err, bool errno_summary)
> > + int id, struct perf_sample *sample, long err,
> > + struct trace *trace)
> > {
> > + struct hashmap *syscall_stats = ttrace->syscall_stats;
> > struct syscall_stats *stats = NULL;
> > u64 duration = 0;
> >
> > - if (!hashmap__find(ttrace->syscall_stats, id, &stats)) {
> > + if (trace->system_summary)
> > + syscall_stats = trace->syscall_stats;
> > +
> > + if (!hashmap__find(syscall_stats, id, &stats)) {
> > stats = zalloc(sizeof(*stats));
> > if (stats == NULL)
> > return;
> >
> > init_stats(&stats->stats);
> > - if (hashmap__add(ttrace->syscall_stats, id, stats) < 0) {
> > + if (hashmap__add(syscall_stats, id, stats) < 0) {
> > free(stats);
> > return;
> > }
> > @@ -2525,7 +2533,7 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
> > if (err < 0) {
> > ++stats->nr_failures;
> >
> > - if (!errno_summary)
> > + if (!trace->errno_summary)
> > return;
> >
> > err = -err;
> > @@ -2817,7 +2825,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
> > ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
> >
> > if (trace->summary)
> > - thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
> > + thread__update_stats(thread, ttrace, id, sample, ret, trace);
> >
> > if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
> > trace__set_fd_pathname(thread, ret, ttrace->filename.name);
> > @@ -3255,10 +3263,13 @@ static int trace__pgfault(struct trace *trace,
> > if (ttrace == NULL)
> > goto out_put;
> >
> > - if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
> > + if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) {
> > ttrace->pfmaj++;
> > - else
> > + trace->pfmaj++;
> > + } else {
> > ttrace->pfmin++;
> > + trace->pfmin++;
> > + }
> >
> > if (trace->summary_only)
> > goto out;
> > @@ -3417,6 +3428,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
> > }
> >
> > static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
> > +static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp);
> >
> > static bool evlist__add_vfs_getname(struct evlist *evlist)
> > {
> > @@ -4329,6 +4341,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> > goto out_delete_evlist;
> > }
> >
> > + if (trace->system_summary) {
> > + trace->syscall_stats = alloc_syscall_stats();
> > + if (trace->syscall_stats == NULL)
> > + goto out_delete_evlist;
> > + }
> > +
> > evlist__config(evlist, &trace->opts, &callchain_param);
> >
> > if (forks) {
> > @@ -4489,8 +4507,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> > ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
> >
> > if (!err) {
> > - if (trace->summary)
> > - trace__fprintf_thread_summary(trace, trace->output);
> > + if (trace->summary) {
> > + if (trace->system_summary)
> > + trace__fprintf_system_summary(trace, trace->output);
> > + else
> > + trace__fprintf_thread_summary(trace, trace->output);
> > + }
> >
> > if (trace->show_tool_stats) {
> > fprintf(trace->output, "Stats:\n "
> > @@ -4502,6 +4524,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> > }
> >
> > out_delete_evlist:
> > + delete_syscall_stats(trace->syscall_stats);
> > trace__symbols__exit(trace);
> > evlist__free_syscall_tp_fields(evlist);
> > evlist__delete(evlist);
> > @@ -4629,6 +4652,12 @@ static int trace__replay(struct trace *trace)
> > evsel->handler = trace__pgfault;
> > }
> >
> > + if (trace->system_summary) {
> > + trace->syscall_stats = alloc_syscall_stats();
> > + if (trace->syscall_stats == NULL)
> > + goto out;
> > + }
> > +
> > setup_pager();
> >
> > err = perf_session__process_events(session);
> > @@ -4639,12 +4668,13 @@ static int trace__replay(struct trace *trace)
> > trace__fprintf_thread_summary(trace, trace->output);
> >
> > out:
> > + delete_syscall_stats(trace->syscall_stats);
> > perf_session__delete(session);
> >
> > return err;
> > }
> >
> > -static size_t trace__fprintf_threads_header(FILE *fp)
> > +static size_t trace__fprintf_summary_header(FILE *fp)
> > {
> > size_t printed;
> >
> > @@ -4667,19 +4697,19 @@ static int entry_cmp(const void *e1, const void *e2)
> > return entry1->msecs > entry2->msecs ? -1 : 1;
> > }
> >
> > -static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> > +static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats)
> > {
> > struct syscall_entry *entry;
> > struct hashmap_entry *pos;
> > unsigned bkt, i, nr;
> >
> > - nr = ttrace->syscall_stats->sz;
> > + nr = syscall_stats->sz;
> > entry = malloc(nr * sizeof(*entry));
> > if (entry == NULL)
> > return NULL;
> >
> > i = 0;
> > - hashmap__for_each_entry(ttrace->syscall_stats, pos, bkt) {
> > + hashmap__for_each_entry(syscall_stats, pos, bkt) {
> > struct syscall_stats *ss = pos->pvalue;
> > struct stats *st = &ss->stats;
> >
> > @@ -4694,14 +4724,14 @@ static struct syscall_entry *thread__sort_stats(struct thread_trace *ttrace)
> > return entry;
> > }
> >
> > -static size_t thread__dump_stats(struct thread_trace *ttrace,
> > - struct trace *trace, FILE *fp)
> > +static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
> > + struct hashmap *syscall_stats)
> > {
> > size_t printed = 0;
> > struct syscall *sc;
> > struct syscall_entry *entries;
> >
> > - entries = thread__sort_stats(ttrace);
> > + entries = syscall__sort_stats(syscall_stats);
> > if (entries == NULL)
> > return 0;
> >
> > @@ -4711,7 +4741,7 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
> > printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
> >
> > - for (size_t i = 0; i < ttrace->syscall_stats->sz; i++) {
> > + for (size_t i = 0; i < syscall_stats->sz; i++) {
> > struct syscall_entry *entry = &entries[i];
> > struct syscall_stats *stats = entry->stats;
> > if (stats) {
> > @@ -4747,6 +4777,17 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> > return printed;
> > }
> >
> > +static size_t thread__dump_stats(struct thread_trace *ttrace,
> > + struct trace *trace, FILE *fp)
> > +{
> > + return syscall__dump_stats(trace, fp, ttrace->syscall_stats);
> > +}
> > +
> > +static size_t system__dump_stats(struct trace *trace, FILE *fp)
> > +{
> > + return syscall__dump_stats(trace, fp, trace->syscall_stats);
> > +}
>
> Perhaps consider inlining these two functions?
I think we can leave it to compiler to make decisions on inlining.
Thanks,
Namhyung
>
> > +
> > static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
> > {
> > size_t printed = 0;
> > @@ -4800,7 +4841,7 @@ static int trace_nr_events_cmp(void *priv __maybe_unused,
> >
> > static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
> > {
> > - size_t printed = trace__fprintf_threads_header(fp);
> > + size_t printed = trace__fprintf_summary_header(fp);
> > LIST_HEAD(threads);
> >
> > if (machine__thread_list(trace->host, &threads) == 0) {
> > @@ -4815,6 +4856,27 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
> > return printed;
> > }
> >
> > +static size_t trace__fprintf_system_summary(struct trace *trace, FILE *fp)
> > +{
> > + size_t printed = trace__fprintf_summary_header(fp);
> > +
> > + printed += fprintf(fp, " total, ");
> > + printed += fprintf(fp, "%lu events", trace->nr_events);
> > +
> > + if (trace->pfmaj)
> > + printed += fprintf(fp, ", %lu majfaults", trace->pfmaj);
> > + if (trace->pfmin)
> > + printed += fprintf(fp, ", %lu minfaults", trace->pfmin);
> > + if (trace->sched)
> > + printed += fprintf(fp, ", %.3f msec\n", trace->runtime_ms);
> > + else if (fputc('\n', fp) != EOF)
> > + ++printed;
> > +
> > + printed += system__dump_stats(trace, fp);
> > +
> > + return printed;
> > +}
> > +
> > static int trace__set_duration(const struct option *opt, const char *str,
> > int unset __maybe_unused)
> > {
> > @@ -5233,6 +5295,8 @@ int cmd_trace(int argc, const char **argv)
> > "Show all syscalls and summary with statistics"),
> > OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
> > "Show errno stats per syscall, use with -s or -S"),
> > + OPT_BOOLEAN(0, "system-summary", &trace.system_summary,
> > + "Show system-wide summary instead of per-thread, use with -s or -S"),
> > OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
> > "Trace pagefaults", parse_pagefaults, "maj"),
> > OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
> > --
> > 2.48.0.rc2.279.g1de40edade-goog
> >
>
> Thanks,
> Howard
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-01-23 23:41 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-14 21:22 [PATCH 0/4] perf trace: Add --system-summary option Namhyung Kim
2025-01-14 21:22 ` [PATCH 1/4] perf trace: Allocate syscall stats only if summary is on Namhyung Kim
2025-01-14 21:22 ` [PATCH 2/4] perf trace: Convert syscall_stats to hashmap Namhyung Kim
2025-01-18 3:03 ` Howard Chu
2025-01-23 23:37 ` Namhyung Kim
2025-01-14 21:22 ` [PATCH 3/4] perf tools: Get rid of now-unused rb_resort.h Namhyung Kim
2025-01-14 21:22 ` [PATCH 4/4] perf trace: Add --system-summary option Namhyung Kim
2025-01-18 3:07 ` Howard Chu
2025-01-23 23:41 ` Namhyung Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox