[PATCH v4 1/2] perf trace: Implement syscall summary in BPF

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
@ 2025-03-26  4:40 Namhyung Kim
  2025-03-26  4:40 ` [PATCH v4 2/2] perf test: Add perf trace summary test Namhyung Kim
  2025-03-29  1:46 ` [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Howard Chu
  0 siblings, 2 replies; 10+ messages in thread
From: Namhyung Kim @ 2025-03-26  4:40 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Song Liu, bpf, Howard Chu

When -s/--summary option is used, it doesn't need (augmented) arguments
of syscalls.  Let's skip the augmentation and load another small BPF
program to collect the statistics in the kernel instead of copying the
data to the ring-buffer to calculate the stats in userspace.  This will
be much more light-weight than the existing approach and remove any lost
events.

Let's add a new option --bpf-summary to control this behavior.  I cannot
make it default because there's no way to get e_machine in the BPF which
is needed for detecting different ABIs like 32-bit compat mode.

No functional changes intended except for no more LOST events. :)

  $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1

   Summary of events:

   total, 6194 events

     syscall            calls  errors  total       min       avg       max       stddev
                                       (msec)    (msec)    (msec)    (msec)        (%)
     --------------- --------  ------ -------- --------- --------- ---------     ------
     epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
     futex                693     45  4317.231     0.000     6.230   500.077     21.98%
     poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
     clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
     ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
     epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
     pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
     nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
     ...

Cc: Howard Chu <howardchu95@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
v4)
 * fix segfault on -S  (Howard)
 * correct some comments  (Howard)

v3)
 * support -S/--with-summary option too  (Howard)
 * make it work only with -a/--all-cpus  (Howard)
 * fix stddev calculation  (Howard)
 * add some comments about syscall_data  (Howard)

v2)
 * Rebased on top of Ian's e_machine changes
 * add --bpf-summary option
 * support per-thread summary
 * add stddev calculation  (Howard)

 tools/perf/Documentation/perf-trace.txt       |   6 +
 tools/perf/Makefile.perf                      |   2 +-
 tools/perf/builtin-trace.c                    |  54 ++-
 tools/perf/util/Build                         |   1 +
 tools/perf/util/bpf-trace-summary.c           | 347 ++++++++++++++++++
 .../perf/util/bpf_skel/syscall_summary.bpf.c  | 118 ++++++
 tools/perf/util/bpf_skel/syscall_summary.h    |  25 ++
 tools/perf/util/trace.h                       |  37 ++
 8 files changed, 577 insertions(+), 13 deletions(-)
 create mode 100644 tools/perf/util/bpf-trace-summary.c
 create mode 100644 tools/perf/util/bpf_skel/syscall_summary.bpf.c
 create mode 100644 tools/perf/util/bpf_skel/syscall_summary.h
 create mode 100644 tools/perf/util/trace.h

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 887dc37773d0f4d6..a8a0d8c33438fef7 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -251,6 +251,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	pretty-printing serves as a fallback to hand-crafted pretty printers, as the latter can
 	better pretty-print integer flags and struct pointers.
 
+--bpf-summary::
+	Collect system call statistics in BPF.  This is only for live mode and
+	works well with -s/--summary option where no argument information is
+	required.
+
+
 PAGEFAULTS
 ----------
 
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d335151736eda370..4c5d093542409f88 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1216,7 +1216,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
-SKELETONS += $(SKEL_OUT)/kwork_top.skel.h
+SKELETONS += $(SKEL_OUT)/kwork_top.skel.h $(SKEL_OUT)/syscall_summary.skel.h
 SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
 SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b9bdab52f5801c3a..3d0c0076884d34cb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -55,6 +55,7 @@
 #include "util/thread_map.h"
 #include "util/stat.h"
 #include "util/tool.h"
+#include "util/trace.h"
 #include "util/util.h"
 #include "trace/beauty/beauty.h"
 #include "trace-event.h"
@@ -141,12 +142,6 @@ struct syscall_fmt {
 	bool	   hexret;
 };
 
-enum summary_mode {
-	SUMMARY__NONE = 0,
-	SUMMARY__BY_TOTAL,
-	SUMMARY__BY_THREAD,
-};
-
 struct trace {
 	struct perf_tool	tool;
 	struct {
@@ -205,7 +200,7 @@ struct trace {
 	} stats;
 	unsigned int		max_stack;
 	unsigned int		min_stack;
-	enum summary_mode	summary_mode;
+	enum trace_summary_mode	summary_mode;
 	int			raw_augmented_syscalls_args_size;
 	bool			raw_augmented_syscalls;
 	bool			fd_path_disabled;
@@ -234,6 +229,7 @@ struct trace {
 	bool			force;
 	bool			vfs_getname;
 	bool			force_btf;
+	bool			summary_bpf;
 	int			trace_pgfaults;
 	char			*perfconfig_events;
 	struct {
@@ -2608,6 +2604,9 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
 	struct syscall_stats *stats = NULL;
 	u64 duration = 0;
 
+	if (trace->summary_bpf)
+		return;
+
 	if (trace->summary_mode == SUMMARY__BY_TOTAL)
 		syscall_stats = trace->syscall_stats;
 
@@ -4371,6 +4370,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
 	trace->live = true;
 
+	if (trace->summary_bpf) {
+		if (trace_prepare_bpf_summary(trace->summary_mode) < 0)
+			goto out_delete_evlist;
+
+		if (trace->summary_only)
+			goto create_maps;
+	}
+
 	if (!trace->raw_augmented_syscalls) {
 		if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
 			goto out_error_raw_syscalls;
@@ -4429,6 +4436,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (trace->cgroup)
 		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
 
+create_maps:
 	err = evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
@@ -4441,7 +4449,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	if (trace->summary_mode == SUMMARY__BY_TOTAL) {
+	if (trace->summary_mode == SUMMARY__BY_TOTAL && !trace->summary_bpf) {
 		trace->syscall_stats = alloc_syscall_stats();
 		if (trace->syscall_stats == NULL)
 			goto out_delete_evlist;
@@ -4529,9 +4537,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (err < 0)
 		goto out_error_apply_filters;
 
-	err = evlist__mmap(evlist, trace->opts.mmap_pages);
-	if (err < 0)
-		goto out_error_mmap;
+	if (!trace->summary_only || !trace->summary_bpf) {
+		err = evlist__mmap(evlist, trace->opts.mmap_pages);
+		if (err < 0)
+			goto out_error_mmap;
+	}
 
 	if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay)
 		evlist__enable(evlist);
@@ -4544,6 +4554,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		evlist__enable(evlist);
 	}
 
+	if (trace->summary_bpf)
+		trace_start_bpf_summary();
+
 	trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
 		perf_thread_map__nr(evlist->core.threads) > 1 ||
 		evlist__first(evlist)->core.attr.inherit;
@@ -4611,12 +4624,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
 	evlist__disable(evlist);
 
+	if (trace->summary_bpf)
+		trace_end_bpf_summary();
+
 	if (trace->sort_events)
 		ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
 
 	if (!err) {
 		if (trace->summary) {
-			if (trace->summary_mode == SUMMARY__BY_TOTAL)
+			if (trace->summary_bpf)
+				trace_print_bpf_summary(trace->output);
+			else if (trace->summary_mode == SUMMARY__BY_TOTAL)
 				trace__fprintf_total_summary(trace, trace->output);
 			else
 				trace__fprintf_thread_summary(trace, trace->output);
@@ -4632,6 +4650,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 out_delete_evlist:
+	trace_cleanup_bpf_summary();
 	delete_syscall_stats(trace->syscall_stats);
 	trace__symbols__exit(trace);
 	evlist__free_syscall_tp_fields(evlist);
@@ -5467,6 +5486,7 @@ int cmd_trace(int argc, const char **argv)
 		     "start"),
 	OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
 		       "to customized ones"),
+	OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"),
 	OPTS_EVSWITCH(&trace.evswitch),
 	OPT_END()
 	};
@@ -5558,6 +5578,16 @@ int cmd_trace(int argc, const char **argv)
 		goto skip_augmentation;
 	}
 
+	if (trace.summary_bpf) {
+		if (!trace.opts.target.system_wide) {
+			/* TODO: Add filters in the BPF to support other targets. */
+			pr_err("Error: --bpf-summary only works for system-wide mode.\n");
+			goto out;
+		}
+		if (trace.summary_only)
+			goto skip_augmentation;
+	}
+
 	trace.skel = augmented_raw_syscalls_bpf__open();
 	if (!trace.skel) {
 		pr_debug("Failed to open augmented syscalls BPF skeleton");
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 946bce6628f37eb6..4311cf154d05304c 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -171,6 +171,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
+perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
 
 ifeq ($(CONFIG_LIBTRACEEVENT),y)
diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c
new file mode 100644
index 0000000000000000..114d8d9ed9b2d3f3
--- /dev/null
+++ b/tools/perf/util/bpf-trace-summary.c
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dwarf-regs.h" /* for EM_HOST */
+#include "syscalltbl.h"
+#include "util/hashmap.h"
+#include "util/trace.h"
+#include "util/util.h"
+#include <bpf/bpf.h>
+#include <linux/time64.h>
+#include <tools/libc_compat.h> /* reallocarray */
+
+#include "bpf_skel/syscall_summary.h"
+#include "bpf_skel/syscall_summary.skel.h"
+
+
+static struct syscall_summary_bpf *skel;
+
+int trace_prepare_bpf_summary(enum trace_summary_mode mode)
+{
+	skel = syscall_summary_bpf__open();
+	if (skel == NULL) {
+		fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
+		return -1;
+	}
+
+	if (mode == SUMMARY__BY_THREAD)
+		skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
+	else
+		skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
+
+	if (syscall_summary_bpf__load(skel) < 0) {
+		fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
+		return -1;
+	}
+
+	if (syscall_summary_bpf__attach(skel) < 0) {
+		fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void trace_start_bpf_summary(void)
+{
+	skel->bss->enabled = 1;
+}
+
+void trace_end_bpf_summary(void)
+{
+	skel->bss->enabled = 0;
+}
+
+struct syscall_node {
+	int syscall_nr;
+	struct syscall_stats stats;
+};
+
+static double rel_stddev(struct syscall_stats *stat)
+{
+	double variance, average;
+
+	if (stat->count < 2)
+		return 0;
+
+	average = (double)stat->total_time / stat->count;
+
+	variance = stat->squared_sum;
+	variance -= (stat->total_time * stat->total_time) / stat->count;
+	variance /= stat->count - 1;
+
+	return 100 * sqrt(variance / stat->count) / average;
+}
+
+/*
+ * The syscall_data is to maintain syscall stats ordered by total time.
+ * It supports different summary modes like per-thread or global.
+ *
+ * For per-thread stats, it uses two-level data strurcture -
+ * syscall_data is keyed by TID and has an array of nodes which
+ * represents each syscall for the thread.
+ *
+ * For global stats, it's still two-level technically but we don't need
+ * per-cpu analysis so it's keyed by the syscall number to combine stats
+ * from different CPUs.  And syscall_data always has a syscall_node so
+ * it can effectively work as flat hierarchy.
+ */
+struct syscall_data {
+	int key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU */
+	int nr_events;
+	int nr_nodes;
+	u64 total_time;
+	struct syscall_node *nodes;
+};
+
+static int datacmp(const void *a, const void *b)
+{
+	const struct syscall_data * const *sa = a;
+	const struct syscall_data * const *sb = b;
+
+	return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
+}
+
+static int nodecmp(const void *a, const void *b)
+{
+	const struct syscall_node *na = a;
+	const struct syscall_node *nb = b;
+
+	return na->stats.total_time > nb->stats.total_time ? -1 : 1;
+}
+
+static size_t sc_node_hash(long key, void *ctx __maybe_unused)
+{
+	return key;
+}
+
+static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+	return key1 == key2;
+}
+
+static int print_common_stats(struct syscall_data *data, FILE *fp)
+{
+	int printed = 0;
+
+	for (int i = 0; i < data->nr_nodes; i++) {
+		struct syscall_node *node = &data->nodes[i];
+		struct syscall_stats *stat = &node->stats;
+		double total = (double)(stat->total_time) / NSEC_PER_MSEC;
+		double min = (double)(stat->min_time) / NSEC_PER_MSEC;
+		double max = (double)(stat->max_time) / NSEC_PER_MSEC;
+		double avg = total / stat->count;
+		const char *name;
+
+		/* TODO: support other ABIs */
+		name = syscalltbl__name(EM_HOST, node->syscall_nr);
+		if (name)
+			printed += fprintf(fp, "   %-15s", name);
+		else
+			printed += fprintf(fp, "   syscall:%-7d", node->syscall_nr);
+
+		printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
+				   stat->count, stat->error, total, min, avg, max,
+				   rel_stddev(stat));
+	}
+	return printed;
+}
+
+static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
+			       struct syscall_stats *map_data)
+{
+	struct syscall_data *data;
+	struct syscall_node *nodes;
+
+	if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
+		data = zalloc(sizeof(*data));
+		if (data == NULL)
+			return -ENOMEM;
+
+		data->key = map_key->cpu_or_tid;
+		if (hashmap__add(hash, data->key, data) < 0) {
+			free(data);
+			return -ENOMEM;
+		}
+	}
+
+	/* update thread total stats */
+	data->nr_events += map_data->count;
+	data->total_time += map_data->total_time;
+
+	nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
+	if (nodes == NULL)
+		return -ENOMEM;
+
+	data->nodes = nodes;
+	nodes = &data->nodes[data->nr_nodes++];
+	nodes->syscall_nr = map_key->nr;
+
+	/* each thread has an entry for each syscall, just use the stat */
+	memcpy(&nodes->stats, map_data, sizeof(*map_data));
+	return 0;
+}
+
+static int print_thread_stat(struct syscall_data *data, FILE *fp)
+{
+	int printed = 0;
+
+	qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
+
+	printed += fprintf(fp, " thread (%d), ", data->key);
+	printed += fprintf(fp, "%d events\n\n", data->nr_events);
+
+	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
+	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
+	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
+
+	printed += print_common_stats(data, fp);
+	printed += fprintf(fp, "\n\n");
+
+	return printed;
+}
+
+static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp)
+{
+	int printed = 0;
+
+	for (int i = 0; i < nr_data; i++)
+		printed += print_thread_stat(data[i], fp);
+
+	return printed;
+}
+
+static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
+			      struct syscall_stats *map_data)
+{
+	struct syscall_data *data;
+	struct syscall_stats *stat;
+
+	if (!hashmap__find(hash, map_key->nr, &data)) {
+		data = zalloc(sizeof(*data));
+		if (data == NULL)
+			return -ENOMEM;
+
+		data->nodes = zalloc(sizeof(*data->nodes));
+		if (data->nodes == NULL) {
+			free(data);
+			return -ENOMEM;
+		}
+
+		data->nr_nodes = 1;
+		data->key = map_key->nr;
+		data->nodes->syscall_nr = data->key;
+
+		if (hashmap__add(hash, data->key, data) < 0) {
+			free(data->nodes);
+			free(data);
+			return -ENOMEM;
+		}
+	}
+
+	/* update total stats for this syscall */
+	data->nr_events += map_data->count;
+	data->total_time += map_data->total_time;
+
+	/* This is sum of the same syscall from different CPUs */
+	stat = &data->nodes->stats;
+
+	stat->total_time += map_data->total_time;
+	stat->squared_sum += map_data->squared_sum;
+	stat->count += map_data->count;
+	stat->error += map_data->error;
+
+	if (stat->max_time < map_data->max_time)
+		stat->max_time = map_data->max_time;
+	if (stat->min_time > map_data->min_time || stat->min_time == 0)
+		stat->min_time = map_data->min_time;
+
+	return 0;
+}
+
+static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
+{
+	int printed = 0;
+	int nr_events = 0;
+
+	for (int i = 0; i < nr_data; i++)
+		nr_events += data[i]->nr_events;
+
+	printed += fprintf(fp, " total, %d events\n\n", nr_events);
+
+	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
+	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
+	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
+
+	for (int i = 0; i < nr_data; i++)
+		printed += print_common_stats(data[i], fp);
+
+	printed += fprintf(fp, "\n\n");
+	return printed;
+}
+
+int trace_print_bpf_summary(FILE *fp)
+{
+	struct bpf_map *map = skel->maps.syscall_stats_map;
+	struct syscall_key *prev_key, key;
+	struct syscall_data **data = NULL;
+	struct hashmap schash;
+	struct hashmap_entry *entry;
+	int nr_data = 0;
+	int printed = 0;
+	int i;
+	size_t bkt;
+
+	hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
+
+	printed = fprintf(fp, "\n Summary of events:\n\n");
+
+	/* get stats from the bpf map */
+	prev_key = NULL;
+	while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
+		struct syscall_stats stat;
+
+		if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
+			if (skel->rodata->aggr_mode == SYSCALL_AGGR_THREAD)
+				update_thread_stats(&schash, &key, &stat);
+			else
+				update_total_stats(&schash, &key, &stat);
+		}
+
+		prev_key = &key;
+	}
+
+	nr_data = hashmap__size(&schash);
+	data = calloc(nr_data, sizeof(*data));
+	if (data == NULL)
+		goto out;
+
+	i = 0;
+	hashmap__for_each_entry(&schash, entry, bkt)
+		data[i++] = entry->pvalue;
+
+	qsort(data, nr_data, sizeof(*data), datacmp);
+
+	if (skel->rodata->aggr_mode == SYSCALL_AGGR_THREAD)
+		printed += print_thread_stats(data, nr_data, fp);
+	else
+		printed += print_total_stats(data, nr_data, fp);
+
+	for (i = 0; i < nr_data && data; i++) {
+		free(data[i]->nodes);
+		free(data[i]);
+	}
+	free(data);
+
+out:
+	hashmap__clear(&schash);
+	return printed;
+}
+
+void trace_cleanup_bpf_summary(void)
+{
+	syscall_summary_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c
new file mode 100644
index 0000000000000000..b25f53b3c1351392
--- /dev/null
+++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trace raw_syscalls tracepoints to collect system call statistics.
+ */
+
+#include "vmlinux.h"
+#include "syscall_summary.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* This is to calculate a delta between sys-enter and sys-exit for each thread */
+struct syscall_trace {
+	int nr; /* syscall number is only available at sys-enter */
+	int unused;
+	u64 timestamp;
+};
+
+#define MAX_ENTRIES	(128 * 1024)
+
+struct syscall_trace_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int); /* tid */
+	__type(value, struct syscall_trace);
+	__uint(max_entries, MAX_ENTRIES);
+} syscall_trace_map SEC(".maps");
+
+struct syscall_stats_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct syscall_key);
+	__type(value, struct syscall_stats);
+	__uint(max_entries, MAX_ENTRIES);
+} syscall_stats_map SEC(".maps");
+
+int enabled; /* controlled from userspace */
+
+const volatile enum syscall_aggr_mode aggr_mode;
+
+static void update_stats(int cpu_or_tid, int nr, s64 duration, long ret)
+{
+	struct syscall_key key = { .cpu_or_tid = cpu_or_tid, .nr = nr, };
+	struct syscall_stats *stats;
+
+	stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
+	if (stats == NULL) {
+		struct syscall_stats zero = {};
+
+		bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST);
+		stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
+		if (stats == NULL)
+			return;
+	}
+
+	__sync_fetch_and_add(&stats->count, 1);
+	if (ret < 0)
+		__sync_fetch_and_add(&stats->error, 1);
+
+	if (duration > 0) {
+		__sync_fetch_and_add(&stats->total_time, duration);
+		__sync_fetch_and_add(&stats->squared_sum, duration * duration);
+		if (stats->max_time < duration)
+			stats->max_time = duration;
+		if (stats->min_time > duration || stats->min_time == 0)
+			stats->min_time = duration;
+	}
+
+	return;
+}
+
+SEC("tp_btf/sys_enter")
+int sys_enter(u64 *ctx)
+{
+	int tid;
+	struct syscall_trace st;
+
+	if (!enabled)
+		return 0;
+
+	st.nr = ctx[1]; /* syscall number */
+	st.unused = 0;
+	st.timestamp = bpf_ktime_get_ns();
+
+	tid = bpf_get_current_pid_tgid();
+	bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY);
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int sys_exit(u64 *ctx)
+{
+	int tid;
+	int key;
+	long ret = ctx[1]; /* return value of the syscall */
+	struct syscall_trace *st;
+	s64 delta;
+
+	if (!enabled)
+		return 0;
+
+	tid = bpf_get_current_pid_tgid();
+	st = bpf_map_lookup_elem(&syscall_trace_map, &tid);
+	if (st == NULL)
+		return 0;
+
+	if (aggr_mode == SYSCALL_AGGR_THREAD)
+		key = tid;
+	else
+		key = bpf_get_smp_processor_id();
+
+	delta = bpf_ktime_get_ns() - st->timestamp;
+	update_stats(key, st->nr, delta, ret);
+
+	bpf_map_delete_elem(&syscall_trace_map, &tid);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h
new file mode 100644
index 0000000000000000..17f9ecba657088aa
--- /dev/null
+++ b/tools/perf/util/bpf_skel/syscall_summary.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Data structures shared between BPF and tools. */
+#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H
+#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H
+
+enum syscall_aggr_mode {
+	SYSCALL_AGGR_THREAD,
+	SYSCALL_AGGR_CPU,
+};
+
+struct syscall_key {
+	int cpu_or_tid;
+	int nr;
+};
+
+struct syscall_stats {
+	u64 total_time;
+	u64 squared_sum;
+	u64 max_time;
+	u64 min_time;
+	u32 count;
+	u32 error;
+};
+
+#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */
diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h
new file mode 100644
index 0000000000000000..ef8361ed12c4edc1
--- /dev/null
+++ b/tools/perf/util/trace.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UTIL_TRACE_H
+#define UTIL_TRACE_H
+
+#include <stdio.h>  /* for FILE */
+
+enum trace_summary_mode {
+	SUMMARY__NONE = 0,
+	SUMMARY__BY_TOTAL,
+	SUMMARY__BY_THREAD,
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int trace_prepare_bpf_summary(enum trace_summary_mode mode);
+void trace_start_bpf_summary(void);
+void trace_end_bpf_summary(void);
+int trace_print_bpf_summary(FILE *fp);
+void trace_cleanup_bpf_summary(void);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused)
+{
+	return -1;
+}
+static inline void trace_start_bpf_summary(void) {}
+static inline void trace_end_bpf_summary(void) {}
+static inline int trace_print_bpf_summary(FILE *fp __maybe_unused)
+{
+	return 0;
+}
+static inline void trace_cleanup_bpf_summary(void) {}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* UTIL_TRACE_H */
-- 
2.49.0.395.g12beb8f557-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v4 2/2] perf test: Add perf trace summary test
  2025-03-26  4:40 [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Namhyung Kim
@ 2025-03-26  4:40 ` Namhyung Kim
  2025-03-29  1:48   ` Howard Chu
  2025-03-29  1:46 ` [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Howard Chu
  1 sibling, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2025-03-26  4:40 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Song Liu, bpf, Howard Chu

  $ sudo ./perf test -vv 'trace summary'
  109: perf trace summary:
  --- start ---
  test child forked, pid 3501572
  testing: perf trace -s -- true
  testing: perf trace -S -- true
  testing: perf trace -s --summary-mode=thread -- true
  testing: perf trace -S --summary-mode=total -- true
  testing: perf trace -as --summary-mode=thread --no-bpf-summary -- true
  testing: perf trace -as --summary-mode=total --no-bpf-summary -- true
  testing: perf trace -as --summary-mode=thread --bpf-summary -- true
  testing: perf trace -as --summary-mode=total --bpf-summary -- true
  testing: perf trace -aS --summary-mode=total --bpf-summary -- true
  ---- end(0) ----
  109: perf trace summary                                              : Ok

Cc: Howard Chu <howardchu95@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/shell/trace_summary.sh | 65 +++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100755 tools/perf/tests/shell/trace_summary.sh

diff --git a/tools/perf/tests/shell/trace_summary.sh b/tools/perf/tests/shell/trace_summary.sh
new file mode 100755
index 0000000000000000..4d98cb212dd9de0b
--- /dev/null
+++ b/tools/perf/tests/shell/trace_summary.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+# perf trace summary
+# SPDX-License-Identifier: GPL-2.0
+
+# Check that perf trace works with various summary mode
+
+# shellcheck source=lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
+
+skip_if_no_perf_trace || exit 2
+[ "$(id -u)" = 0 ] || exit 2
+
+OUTPUT=$(mktemp /tmp/perf_trace_test.XXXXX)
+
+test_perf_trace() {
+    args=$1
+    workload="true"
+    search="^\s*(open|read|close).*[0-9]+%$"
+
+    echo "testing: perf trace ${args} -- ${workload}"
+    perf trace ${args} -- ${workload} >${OUTPUT} 2>&1
+    if [ $? -ne 0 ]; then
+        echo "Error: perf trace ${args} failed unexpectedly"
+        cat ${OUTPUT}
+        rm -f ${OUTPUT}
+        exit 1
+    fi
+
+    count=$(grep -E -c -m 3 "${search}" ${OUTPUT})
+    if [ "${count}" != "3" ]; then
+	echo "Error: cannot find enough pattern ${search} in the output"
+	cat ${OUTPUT}
+	rm -f ${OUTPUT}
+	exit 1
+    fi
+}
+
+# summary only for a process
+test_perf_trace "-s"
+
+# normal output with summary at the end
+test_perf_trace "-S"
+
+# summary only with an explicit summary mode
+test_perf_trace "-s --summary-mode=thread"
+
+# summary with normal output - total summary mode
+test_perf_trace "-S --summary-mode=total"
+
+# summary only for system wide - per-thread summary
+test_perf_trace "-as --summary-mode=thread --no-bpf-summary"
+
+# summary only for system wide - total summary mode
+test_perf_trace "-as --summary-mode=total --no-bpf-summary"
+
+# summary only for system wide - per-thread summary with BPF
+test_perf_trace "-as --summary-mode=thread --bpf-summary"
+
+# summary only for system wide - total summary mode with BPF
+test_perf_trace "-as --summary-mode=total --bpf-summary"
+
+# summary with normal output for system wide - total summary mode with BPF
+test_perf_trace "-aS --summary-mode=total --bpf-summary"
+
+rm -f ${OUTPUT}
-- 
2.49.0.395.g12beb8f557-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-03-26  4:40 [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Namhyung Kim
  2025-03-26  4:40 ` [PATCH v4 2/2] perf test: Add perf trace summary test Namhyung Kim
@ 2025-03-29  1:46 ` Howard Chu
  2025-04-23 16:19   ` Arnaldo Carvalho de Melo
  2025-04-23 16:26   ` Arnaldo Carvalho de Melo
  1 sibling, 2 replies; 10+ messages in thread
From: Howard Chu @ 2025-03-29  1:46 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
	Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Song Liu, bpf

Hello Namhyung,

On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> When -s/--summary option is used, it doesn't need (augmented) arguments
> of syscalls.  Let's skip the augmentation and load another small BPF
> program to collect the statistics in the kernel instead of copying the
> data to the ring-buffer to calculate the stats in userspace.  This will
> be much more light-weight than the existing approach and remove any lost
> events.
>
> Let's add a new option --bpf-summary to control this behavior.  I cannot
> make it default because there's no way to get e_machine in the BPF which
> is needed for detecting different ABIs like 32-bit compat mode.
>
> No functional changes intended except for no more LOST events. :)
>
>   $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1
>
>    Summary of events:
>
>    total, 6194 events
>
>      syscall            calls  errors  total       min       avg       max       stddev
>                                        (msec)    (msec)    (msec)    (msec)        (%)
>      --------------- --------  ------ -------- --------- --------- ---------     ------
>      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
>      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
>      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
>      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
>      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
>      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
>      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
>      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
>      ...
>
> Cc: Howard Chu <howardchu95@gmail.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> v4)
>  * fix segfault on -S  (Howard)
>  * correct some comments  (Howard)

+ if (!hashmap__find(hash, map_key->nr, &data)) {

I think you should mention the hashmap's map_key->nr update, as this
change is actually important for the feature.

>
> v3)
>  * support -S/--with-summary option too  (Howard)
>  * make it work only with -a/--all-cpus  (Howard)
>  * fix stddev calculation  (Howard)
>  * add some comments about syscall_data  (Howard)
>
> v2)
>  * Rebased on top of Ian's e_machine changes
>  * add --bpf-summary option
>  * support per-thread summary
>  * add stddev calculation  (Howard)
>
>  tools/perf/Documentation/perf-trace.txt       |   6 +
>  tools/perf/Makefile.perf                      |   2 +-
>  tools/perf/builtin-trace.c                    |  54 ++-
>  tools/perf/util/Build                         |   1 +
>  tools/perf/util/bpf-trace-summary.c           | 347 ++++++++++++++++++
>  .../perf/util/bpf_skel/syscall_summary.bpf.c  | 118 ++++++
>  tools/perf/util/bpf_skel/syscall_summary.h    |  25 ++
>  tools/perf/util/trace.h                       |  37 ++
>  8 files changed, 577 insertions(+), 13 deletions(-)
>  create mode 100644 tools/perf/util/bpf-trace-summary.c
>  create mode 100644 tools/perf/util/bpf_skel/syscall_summary.bpf.c
>  create mode 100644 tools/perf/util/bpf_skel/syscall_summary.h
>  create mode 100644 tools/perf/util/trace.h
>
> diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
> index 887dc37773d0f4d6..a8a0d8c33438fef7 100644
> --- a/tools/perf/Documentation/perf-trace.txt
> +++ b/tools/perf/Documentation/perf-trace.txt
> @@ -251,6 +251,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
>         pretty-printing serves as a fallback to hand-crafted pretty printers, as the latter can
>         better pretty-print integer flags and struct pointers.
>
> +--bpf-summary::
> +       Collect system call statistics in BPF.  This is only for live mode and
> +       works well with -s/--summary option where no argument information is
> +       required.

It works with -S as well, doesn't it?

Anyway, I don't mind adding these details later on, so

Reviewed-by: Howard Chu <howardchu95@gmail.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] perf test: Add perf trace summary test
  2025-03-26  4:40 ` [PATCH v4 2/2] perf test: Add perf trace summary test Namhyung Kim
@ 2025-03-29  1:48   ` Howard Chu
  0 siblings, 0 replies; 10+ messages in thread
From: Howard Chu @ 2025-03-29  1:48 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang, Jiri Olsa,
	Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Song Liu, bpf

Hello,

On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
>   $ sudo ./perf test -vv 'trace summary'
>   109: perf trace summary:
>   --- start ---
>   test child forked, pid 3501572
>   testing: perf trace -s -- true
>   testing: perf trace -S -- true
>   testing: perf trace -s --summary-mode=thread -- true
>   testing: perf trace -S --summary-mode=total -- true
>   testing: perf trace -as --summary-mode=thread --no-bpf-summary -- true
>   testing: perf trace -as --summary-mode=total --no-bpf-summary -- true
>   testing: perf trace -as --summary-mode=thread --bpf-summary -- true
>   testing: perf trace -as --summary-mode=total --bpf-summary -- true
>   testing: perf trace -aS --summary-mode=total --bpf-summary -- true
>   ---- end(0) ----
>   109: perf trace summary                                              : Ok
>
> Cc: Howard Chu <howardchu95@gmail.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/tests/shell/trace_summary.sh | 65 +++++++++++++++++++++++++
>  1 file changed, 65 insertions(+)
>  create mode 100755 tools/perf/tests/shell/trace_summary.sh
>
> diff --git a/tools/perf/tests/shell/trace_summary.sh b/tools/perf/tests/shell/trace_summary.sh
> new file mode 100755
> index 0000000000000000..4d98cb212dd9de0b
> --- /dev/null
> +++ b/tools/perf/tests/shell/trace_summary.sh
> @@ -0,0 +1,65 @@
> +#!/bin/sh
> +# perf trace summary
> +# SPDX-License-Identifier: GPL-2.0
> +
> +# Check that perf trace works with various summary mode
> +
> +# shellcheck source=lib/probe.sh
> +. "$(dirname $0)"/lib/probe.sh
> +
> +skip_if_no_perf_trace || exit 2
> +[ "$(id -u)" = 0 ] || exit 2
> +
> +OUTPUT=$(mktemp /tmp/perf_trace_test.XXXXX)
> +
> +test_perf_trace() {
> +    args=$1
> +    workload="true"
> +    search="^\s*(open|read|close).*[0-9]+%$"
> +
> +    echo "testing: perf trace ${args} -- ${workload}"
> +    perf trace ${args} -- ${workload} >${OUTPUT} 2>&1
> +    if [ $? -ne 0 ]; then
> +        echo "Error: perf trace ${args} failed unexpectedly"
> +        cat ${OUTPUT}
> +        rm -f ${OUTPUT}
> +        exit 1
> +    fi
> +
> +    count=$(grep -E -c -m 3 "${search}" ${OUTPUT})
> +    if [ "${count}" != "3" ]; then
> +       echo "Error: cannot find enough pattern ${search} in the output"
> +       cat ${OUTPUT}
> +       rm -f ${OUTPUT}
> +       exit 1
> +    fi
> +}
> +
> +# summary only for a process
> +test_perf_trace "-s"
> +
> +# normal output with summary at the end
> +test_perf_trace "-S"
> +
> +# summary only with an explicit summary mode
> +test_perf_trace "-s --summary-mode=thread"
> +
> +# summary with normal output - total summary mode
> +test_perf_trace "-S --summary-mode=total"
> +
> +# summary only for system wide - per-thread summary
> +test_perf_trace "-as --summary-mode=thread --no-bpf-summary"
> +
> +# summary only for system wide - total summary mode
> +test_perf_trace "-as --summary-mode=total --no-bpf-summary"
> +
> +# summary only for system wide - per-thread summary with BPF
> +test_perf_trace "-as --summary-mode=thread --bpf-summary"
> +
> +# summary only for system wide - total summary mode with BPF
> +test_perf_trace "-as --summary-mode=total --bpf-summary"
> +
> +# summary with normal output for system wide - total summary mode with BPF
> +test_perf_trace "-aS --summary-mode=total --bpf-summary"
> +
> +rm -f ${OUTPUT}
> --
> 2.49.0.395.g12beb8f557-goog
>

Didn't quite get the combinatorial logic but it sure covers a lot :)

Reviewed-by: Howard Chu <howardchu95@gmail.com>

Thanks,
Howard

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-03-29  1:46 ` [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Howard Chu
@ 2025-04-23 16:19   ` Arnaldo Carvalho de Melo
  2025-04-23 16:26   ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-04-23 16:19 UTC (permalink / raw)
  To: Howard Chu
  Cc: Namhyung Kim, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> Hello Namhyung,
> 
> On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > When -s/--summary option is used, it doesn't need (augmented) arguments
> > of syscalls.  Let's skip the augmentation and load another small BPF
> > program to collect the statistics in the kernel instead of copying the
> > data to the ring-buffer to calculate the stats in userspace.  This will
> > be much more light-weight than the existing approach and remove any lost
> > events.
> >
> > Let's add a new option --bpf-summary to control this behavior.  I cannot
> > make it default because there's no way to get e_machine in the BPF which
> > is needed for detecting different ABIs like 32-bit compat mode.
> >
> > No functional changes intended except for no more LOST events. :)
> >
> >   $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1
> >
> >    Summary of events:
> >
> >    total, 6194 events
> >
> >      syscall            calls  errors  total       min       avg       max       stddev
> >                                        (msec)    (msec)    (msec)    (msec)        (%)
> >      --------------- --------  ------ -------- --------- --------- ---------     ------
> >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> >      ...
> >
> > Cc: Howard Chu <howardchu95@gmail.com>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > v4)
> >  * fix segfault on -S  (Howard)
> >  * correct some comments  (Howard)
> 
> + if (!hashmap__find(hash, map_key->nr, &data)) {
> 
> I think you should mention the hashmap's map_key->nr update, as this
> change is actually important for the feature.
> 
> >
> > v3)
> >  * support -S/--with-summary option too  (Howard)
> >  * make it work only with -a/--all-cpus  (Howard)
> >  * fix stddev calculation  (Howard)
> >  * add some comments about syscall_data  (Howard)
> >
> > v2)
> >  * Rebased on top of Ian's e_machine changes
> >  * add --bpf-summary option
> >  * support per-thread summary
> >  * add stddev calculation  (Howard)
> >
> >  tools/perf/Documentation/perf-trace.txt       |   6 +
> >  tools/perf/Makefile.perf                      |   2 +-
> >  tools/perf/builtin-trace.c                    |  54 ++-
> >  tools/perf/util/Build                         |   1 +
> >  tools/perf/util/bpf-trace-summary.c           | 347 ++++++++++++++++++
> >  .../perf/util/bpf_skel/syscall_summary.bpf.c  | 118 ++++++
> >  tools/perf/util/bpf_skel/syscall_summary.h    |  25 ++
> >  tools/perf/util/trace.h                       |  37 ++
> >  8 files changed, 577 insertions(+), 13 deletions(-)
> >  create mode 100644 tools/perf/util/bpf-trace-summary.c
> >  create mode 100644 tools/perf/util/bpf_skel/syscall_summary.bpf.c
> >  create mode 100644 tools/perf/util/bpf_skel/syscall_summary.h
> >  create mode 100644 tools/perf/util/trace.h
> >
> > diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
> > index 887dc37773d0f4d6..a8a0d8c33438fef7 100644
> > --- a/tools/perf/Documentation/perf-trace.txt
> > +++ b/tools/perf/Documentation/perf-trace.txt
> > @@ -251,6 +251,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
> >         pretty-printing serves as a fallback to hand-crafted pretty printers, as the latter can
> >         better pretty-print integer flags and struct pointers.
> >
> > +--bpf-summary::
> > +       Collect system call statistics in BPF.  This is only for live mode and
> > +       works well with -s/--summary option where no argument information is
> > +       required.

root@number:~#> 
> It works with -S as well, doesn't it?

Yes, I tested it:

root@number:~# perf trace -aS --summary-mode=total --bpf-summary sleep 0.000000001
     0.011 ( 0.008 ms): :146484/146484 execve(filename: "/home/acme/libexec/perf-core/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) = -1 ENOENT (No such file or directory)
     0.021 ( 0.002 ms): :146484/146484 execve(filename: "/root/.local/bin/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) = -1 ENOENT (No such file or directory)
     0.024 ( 0.002 ms): :146484/146484 execve(filename: "/root/bin/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) = -1 ENOENT (No such file or directory)
     0.026 ( 0.002 ms): :146484/146484 execve(filename: "/usr/local/sbin/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) = -1 ENOENT (No such file or directory)
     0.029 ( 0.001 ms): :146484/146484 execve(filename: "/usr/local/bin/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) = -1 ENOENT (No such file or directory)
         ? (         ): sudo/115804  ... [continued]: ppoll())                                            = 1
     0.032 (         ): :146484/146484 execve(filename: "/usr/sbin/sleep", argv: 0x7ffcdf2108f0, envp: 0x37fabf70) ...
     0.146 ( 0.002 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x557bdbdec4c0,.sa_flags = (long unsigned int)67108864,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, oact: 0x7ffff79f7d80, sigsetsize: 8) = 0
     0.150 ( 0.003 ms): sudo/115804 read(fd: 9</dev/ptmx>, buf: 0x557be6008260, count: 65536)             = 297
     0.155 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x1,.sa_flags = (long unsigned int)335544320,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, sigsetsize: 8) = 0
     0.158 ( 0.001 ms): sudo/115804 rt_sigprocmask(nset: 0x557bdbe1a6a0, oset: 0x7ffff79f7d70, sigsetsize: 8) = 0
     0.162 ( 0.001 ms): sudo/115804 rt_sigprocmask(how: SETMASK, nset: 0x7ffff79f7d70, sigsetsize: 8)     = 0
     0.165 ( 0.002 ms): sudo/115804 ppoll(ufds: 0x557be5f955b0, nfds: 5, sigsetsize: 8)                   = 2
     0.169 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x557bdbdec4c0,.sa_flags = (long unsigned int)67108864,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, oact: 0x7ffff79f7d80, sigsetsize: 8) = 0
     0.171 ( 0.002 ms): sudo/115804 read(fd: 9</dev/ptmx>, buf: 0x557be6008389, count: 65239)             = 502
     0.175 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x1,.sa_flags = (long unsigned int)335544320,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, sigsetsize: 8) = 0
     0.177 ( 0.001 ms): sudo/115804 rt_sigprocmask(nset: 0x557bdbe1a6a0, oset: 0x7ffff79f7d70, sigsetsize: 8) = 0
     0.179 ( 0.001 ms): sudo/115804 rt_sigprocmask(how: SETMASK, nset: 0x7ffff79f7d70, sigsetsize: 8)     = 0
     0.181 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTOU, act: (struct sigaction){.sa_handler = (__sighandler_t)0x557bdbdec4d0,.sa_flags = (long unsigned int)67108864,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, oact: 0x7ffff79f7d80, sigsetsize: 8) = 0
     0.183 ( 0.004 ms): sudo/115804 write(fd: 8</dev/tty>, buf:          ? (         ): :146484/, count: 799) = 799
     0.189 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTOU, act: (struct sigaction){.sa_handler = (__sighandler_t)0x1,.sa_flags = (long unsigned int)335544320,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, sigsetsize: 8) = 0
     0.193 ( 0.002 ms): sudo/115804 ppoll(ufds: 0x557be5f955b0, nfds: 4, sigsetsize: 8)                   = 1
     0.196 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x557bdbdec4c0,.sa_flags = (long unsigned int)67108864,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, oact: 0x7ffff79f7d80, sigsetsize: 8) = 0
     0.199 ( 0.002 ms): sudo/115804 read(fd: 9</dev/ptmx>, buf: 0x557be6008260, count: 65536)             = 379
     0.201 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTIN, act: (struct sigaction){.sa_handler = (__sighandler_t)0x1,.sa_flags = (long unsigned int)335544320,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, sigsetsize: 8) = 0
     0.203 ( 0.001 ms): sudo/115804 rt_sigprocmask(nset: 0x557bdbe1a6a0, oset: 0x7ffff79f7d70, sigsetsize: 8) = 0
     0.205 ( 0.001 ms): sudo/115804 rt_sigprocmask(how: SETMASK, nset: 0x7ffff79f7d70, sigsetsize: 8)     = 0
     0.206 ( 0.002 ms): sudo/115804 ppoll(ufds: 0x557be5f955b0, nfds: 5, sigsetsize: 8)                   = 1
     0.209 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTOU, act: (struct sigaction){.sa_handler = (__sighandler_t)0x557bdbdec4d0,.sa_flags = (long unsigned int)67108864,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, oact: 0x7ffff79f7d80, sigsetsize: 8) = 0
     0.211 ( 0.002 ms): sudo/115804 write(fd: 8</dev/tty>, buf: ): :146484/146484 execve(filenam, count: 379) = 379
     0.213 ( 0.001 ms): sudo/115804 rt_sigaction(sig: TTOU, act: (struct sigaction){.sa_handler = (__sighandler_t)0x1,.sa_flags = (long unsigned int)335544320,.sa_restorer = (__sigrestore_t)0x7f50c6627bf0,}, sigsetsize: 8) = 0
         ? (         ): ptyxis/3622  ... [continued]: ppoll())                                            = 1
     0.215 (         ): sudo/115804 ppoll(ufds: 0x557be5f955b0, nfds: 4, sigsetsize: 8)                ...
     0.196 ( 0.002 ms): ptyxis/3622 write(fd: 4<anon_inode:[eventfd]>, buf: \1\0\0\0\0\0\0\0, count: 8)   = 8
     0.206 ( 0.003 ms): ptyxis/3622 read(fd: 41</dev/ptmx>, buf: 0x5586a84ee428, count: 8136)             = 800
     0.209 ( 0.001 ms): ptyxis/3622 read(fd: 41</dev/ptmx>, buf: 0x5586a84ee747, count: 7337)             = -1 EAGAIN (Resource temporarily unavailable)
     0.221 ( 0.001 ms): ptyxis/3622 write(fd: 4<anon_inode:[eventfd]>, buf: \1\0\0\0\0\0\0\0, count: 8)   = 8
     0.224 ( 0.002 ms): ptyxis/3622 ppoll(ufds: 0x5586a7e8f120, nfds: 10, tsp: 0x7ffdd1fbb470, sigsetsize: 8) = 2
     0.227 ( 0.001 ms): ptyxis/3622 read(fd: 4<anon_inode:[eventfd]>, buf: 0x7ffdd1fbb3a0, count: 8)      = 8
     0.229 ( 0.001 ms): ptyxis/3622 write(fd: 4<anon_inode:[eventfd]>, buf: \1\0\0\0\0\0\0\0, count: 8)   = 8
     0.231 ( 0.001 ms): ptyxis/3622 read(fd: 41</dev/ptmx>, buf: 0x5586a84ee747, count: 7337)             = 380
     0.233 ( 0.001 ms): ptyxis/3622 read(fd: 41</dev/ptmx>, buf: 0x5586a84ee8c2, count: 6958)             = -1 EAGAIN (Resource temporarily unavailable)
     0.234 ( 0.001 ms): ptyxis/3622 write(fd: 4<anon_inode:[eventfd]>, buf: \1\0\0\0\0\0\0\0, count: 8)   = 8
     0.236 ( 0.001 ms): ptyxis/3622 ppoll(ufds: 0x5586a7e8f120, nfds: 10, tsp: 0x7ffdd1fbb470, sigsetsize: 8) = 1
     0.238 ( 0.001 ms): ptyxis/3622 read(fd: 4<anon_inode:[eventfd]>, buf: 0x7ffdd1fbb3a0, count: 8)      = 8
         ? (         ): mdns_service/5565  ... [continued]: recvfrom())                                         = -1 EAGAIN (Resource temporarily unavailable)
     0.241 (         ): ptyxis/3622 ppoll(ufds: 0x5586a7e8f120, nfds: 10, tsp: 0x7ffdd1fbb470, sigsetsize: 8) ...
     0.032 ( 0.627 ms): sleep/146484  ... [continued]: execve())                                           = 0
     1.059 (         ): mdns_service/5565 recvfrom(fd: 292<socket:[81029]>, ubuf: 0x7f6cfdc454c0, size: 9000, addr: 0x7f6cfdc47b00, addr_len: 0x7f6cfdc47a00) ...
     0.676 ( 0.001 ms): sleep/146484 brk()                                                                 = 0x56443e2d4000
     0.689 ( 0.002 ms): sleep/146484 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS)           = 0x7fe66d41d000
     0.693 ( 0.002 ms): sleep/146484 access(filename: "/etc/ld.so.preload", mode: R)                       = -1 ENOENT (No such file or directory)
     0.698 ( 0.002 ms): sleep/146484 openat(dfd: CWD, filename: "/etc/ld.so.cache", flags: RDONLY|CLOEXEC) = 3
     0.701 ( 0.001 ms): sleep/146484 fstat(fd: 3, statbuf: 0x7ffefb498350)                                 = 0
     0.704 ( 0.003 ms): sleep/146484 mmap(len: 76091, prot: READ, flags: PRIVATE, fd: 3)                   = 0x7fe66d40a000
     0.708 ( 0.001 ms): sleep/146484 close(fd: 3)                                                          = 0
     0.712 ( 0.002 ms): sleep/146484 openat(dfd: CWD, filename: "/lib64/libc.so.6", flags: RDONLY|CLOEXEC) = 3
     0.715 ( 0.001 ms): sleep/146484 read(fd: 3, buf: 0x7ffefb4984b8, count: 832)                          = 832
     0.717 ( 0.001 ms): sleep/146484 pread64(fd: 3, buf: 0x7ffefb4980a0, count: 784, pos: 64)              = 784
     0.719 ( 0.001 ms): sleep/146484 fstat(fd: 3, statbuf: 0x7ffefb498340)                                 = 0
     0.722 ( 0.001 ms): sleep/146484 pread64(fd: 3, buf: 0x7ffefb497f80, count: 784, pos: 64)              = 784
     0.723 ( 0.003 ms): sleep/146484 mmap(len: 2038872, prot: READ|EXEC, flags: PRIVATE|DENYWRITE, fd: 3)  = 0x7fe66d218000
     0.727 ( 0.004 ms): sleep/146484 mmap(addr: 0x7fe66d387000, len: 479232, prot: READ, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x16f000) = 0x7fe66d387000
     0.733 ( 0.003 ms): sleep/146484 mmap(addr: 0x7fe66d3fc000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x1e3000) = 0x7fe66d3fc000
     0.737 ( 0.002 ms): sleep/146484 mmap(addr: 0x7fe66d402000, len: 31832, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7fe66d402000
     0.743 ( 0.001 ms): sleep/146484 close(fd: 3)                                                          = 0
     0.748 ( 0.002 ms): sleep/146484 mmap(len: 12288, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS)          = 0x7fe66d215000
     0.753 ( 0.001 ms): sleep/146484 arch_prctl(option: SET_FS, arg2: 0x7fe66d215740)                      = 0
     0.754 ( 0.001 ms): sleep/146484 set_tid_address(tidptr: 0x7fe66d215a10)                               = 146484 (sleep)
     0.757 ( 0.001 ms): sleep/146484 set_robust_list(head: (struct robust_list_head){.list = (struct robust_list){.next = (struct robust_list *)0x7fe66d215a20,},.futex_offset = (long int)-32,}, len: 24) = 0
     0.759 ( 0.001 ms): sleep/146484 rseq(rseq: (struct rseq){.cpu_id = (__u32)4294967295,}, rseq_len: 32, sig: 1392848979) = 0
     0.780 ( 0.003 ms): sleep/146484 mprotect(start: 0x7fe66d3fc000, len: 16384, prot: READ)               = 0
     0.788 ( 0.002 ms): sleep/146484 mprotect(start: 0x564438854000, len: 4096, prot: READ)                = 0
     0.792 ( 0.002 ms): sleep/146484 mprotect(start: 0x7fe66d459000, len: 8192, prot: READ)                = 0
     0.798 ( 0.001 ms): sleep/146484 prlimit64(resource: STACK, old_rlim: 0x7ffefb498e90)                  = 0
     0.807 ( 0.003 ms): sleep/146484 munmap(addr: 0x7fe66d40a000, len: 76091)                              = 0
     0.817 ( 0.001 ms): sleep/146484 getrandom(ubuf: 0x7fe66d407218, len: 8, flags: NONBLOCK)              = 8
     0.819 ( 0.001 ms): sleep/146484 brk()                                                                 = 0x56443e2d4000
     0.821 ( 0.003 ms): sleep/146484 brk(brk: 0x56443e2f5000)                                              = 0x56443e2f5000
     0.827 ( 0.032 ms): sleep/146484 openat(dfd: CWD, filename: "", flags: RDONLY|CLOEXEC)                 = 3
     0.860 ( 0.001 ms): sleep/146484 fstat(fd: 3, statbuf: 0x7fe66d401800)                                 = 0
     0.862 ( 0.002 ms): sleep/146484 mmap(len: 233242544, prot: READ, flags: PRIVATE, fd: 3)               = 0x7fe65f200000
     0.867 ( 0.001 ms): sleep/146484 close(fd: 3)                                                          = 0
     0.888 ( 0.003 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/locale.alias", flags: RDONLY|CLOEXEC) = 3
     0.892 ( 0.001 ms): sleep/146484 fstat(fd: 3, statbuf: 0x7ffefb498a70)                                 = 0
     0.895 ( 0.002 ms): sleep/146484 read(fd: 3, buf: 0x56443e2d5680, count: 4096)                         = 2998
     0.901 ( 0.001 ms): sleep/146484 read(fd: 3, buf: 0x56443e2d5680, count: 4096)                         = 0
     0.903 ( 0.001 ms): sleep/146484 close(fd: 3)                                                          = 0
     0.909 ( 0.002 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.912 ( 0.001 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.914 ( 0.002 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.916 ( 0.001 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en.UTF-8/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.918 ( 0.001 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en.utf8/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.920 ( 0.002 ms): sleep/146484 openat(dfd: CWD, filename: "/usr/share/locale/en/LC_MESSAGES/coreutils.mo") = -1 ENOENT (No such file or directory)
     0.930 ( 0.055 ms): sleep/146484 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 1 }, rmtp: 0x7ffefb4990f0) = 0
     0.987 ( 0.001 ms): sleep/146484 close(fd: 1)                                                          = 0
     0.989 ( 0.001 ms): sleep/146484 close(fd: 2)                                                          = 0
     0.992 (         ): sleep/146484 exit_group()                                                          = ?

 Summary of events:

 total, 3096 events

   syscall            calls  errors  total       min       avg       max       stddev
                                     (msec)    (msec)    (msec)    (msec)        (%)
   --------------- --------  ------ -------- --------- --------- ---------     ------
   ppoll                317      0    47.372     0.000     0.149     3.804     17.80%
   recvfrom               8      8    18.000     1.986     2.250     2.996      7.19%
   sched_setaffinity       66      0     0.743     0.001     0.011     0.021      6.16%
   execve                 6      5     0.644     0.001     0.107     0.630     97.28%
   write               1268      0     0.548     0.000     0.000     0.005      2.30%
   read                 390     75     0.158     0.000     0.000     0.012      9.19%
   ioctl                138      1     0.119     0.000     0.001     0.011     14.79%
   newfstatat            28     17     0.079     0.001     0.003     0.025     33.91%
   rt_sigaction         446      0     0.077     0.000     0.000     0.002      4.78%
   futex                 20      1     0.077     0.000     0.004     0.037     51.43%
   openat                13      6     0.057     0.001     0.004     0.032     51.97%
   clock_nanosleep        1      0     0.055     0.055     0.055     0.055      0.00%
   rt_sigprocmask       290      0     0.047     0.000     0.000     0.002      5.61%
   mmap                   8      0     0.021     0.002     0.003     0.004     12.60%
   poll                   4      0     0.015     0.000     0.004     0.014     92.77%
   readlink               5      0     0.014     0.001     0.003     0.005     28.80%
   close                 15      0     0.009     0.000     0.001     0.001     12.52%
   pread64               10      0     0.009     0.000     0.001     0.003     26.77%
   recvmsg               17     13     0.008     0.000     0.000     0.002     23.80%
   mprotect               3      0     0.006     0.002     0.002     0.003     15.31%
   sendmsg                5      0     0.006     0.001     0.001     0.002     21.98%
   fstat                  6      0     0.005     0.000     0.001     0.001     23.30%
   brk                    3      0     0.004     0.001     0.001     0.003     39.48%
   munmap                 1      0     0.003     0.003     0.003     0.003      0.00%
   access                 1      1     0.002     0.002     0.002     0.002      0.00%
   timerfd_settime        5      0     0.002     0.000     0.000     0.000     11.96%
   eventfd2               1      0     0.002     0.002     0.002     0.002      0.00%
   sched_getaffinity        2      0     0.001     0.001     0.001     0.001      0.96%
   getrandom              1      0     0.001     0.001     0.001     0.001      0.00%
   rt_sigreturn           1      0     0.001     0.001     0.001     0.001      0.00%
   prlimit64              1      0     0.001     0.001     0.001     0.001      0.00%
   set_tid_address        1      0     0.001     0.001     0.001     0.001      0.00%
   getpid                 6      0     0.001     0.000     0.000     0.000     14.19%
   arch_prctl             1      0     0.001     0.001     0.001     0.001      0.00%
   set_robust_list        1      0     0.001     0.001     0.001     0.001      0.00%
   rseq                   1      0     0.001     0.001     0.001     0.001      0.00%
   fcntl                  3      0     0.001     0.000     0.000     0.000     20.48%
   epoll_wait             2      0     0.001     0.000     0.000     0.000     38.12%
   uname                  1      0     0.000     0.000     0.000     0.000      0.00%
 
> Anyway, I don't mind adding these details later on, so
> 
> Reviewed-by: Howard Chu <howardchu95@gmail.com>

Thanks, applied to perf-tools-next,

- Arnaldo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-03-29  1:46 ` [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Howard Chu
  2025-04-23 16:19   ` Arnaldo Carvalho de Melo
@ 2025-04-23 16:26   ` Arnaldo Carvalho de Melo
  2025-04-23 17:41     ` Namhyung Kim
  1 sibling, 1 reply; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-04-23 16:26 UTC (permalink / raw)
  To: Howard Chu
  Cc: Namhyung Kim, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> Hello Namhyung,
> 
> On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > When -s/--summary option is used, it doesn't need (augmented) arguments
> > of syscalls.  Let's skip the augmentation and load another small BPF
> > program to collect the statistics in the kernel instead of copying the
> > data to the ring-buffer to calculate the stats in userspace.  This will
> > be much more light-weight than the existing approach and remove any lost
> > events.
> >
> > Let's add a new option --bpf-summary to control this behavior.  I cannot
> > make it default because there's no way to get e_machine in the BPF which
> > is needed for detecting different ABIs like 32-bit compat mode.
> >
> > No functional changes intended except for no more LOST events. :)
> >
> >   $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1
> >
> >    Summary of events:
> >
> >    total, 6194 events
> >
> >      syscall            calls  errors  total       min       avg       max       stddev
> >                                        (msec)    (msec)    (msec)    (msec)        (%)
> >      --------------- --------  ------ -------- --------- --------- ---------     ------
> >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> >      ...

I added the following to align sched_[gs]etaffinity,

Thanks,

- Arnaldo


diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c
index 114d8d9ed9b2d3f3..af37d3bb5f9c42e7 100644
--- a/tools/perf/util/bpf-trace-summary.c
+++ b/tools/perf/util/bpf-trace-summary.c
@@ -139,9 +139,9 @@ static int print_common_stats(struct syscall_data *data, FILE *fp)
 		/* TODO: support other ABIs */
 		name = syscalltbl__name(EM_HOST, node->syscall_nr);
 		if (name)
-			printed += fprintf(fp, "   %-15s", name);
+			printed += fprintf(fp, "   %-17s", name);
 		else
-			printed += fprintf(fp, "   syscall:%-7d", node->syscall_nr);
+			printed += fprintf(fp, "   syscall:%-9d", node->syscall_nr);
 
 		printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
 				   stat->count, stat->error, total, min, avg, max,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-04-23 16:26   ` Arnaldo Carvalho de Melo
@ 2025-04-23 17:41     ` Namhyung Kim
  2025-04-23 20:50       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2025-04-23 17:41 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Howard Chu, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

Hi Arnaldo,

On Wed, Apr 23, 2025 at 01:26:48PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> > Hello Namhyung,
> > 
> > On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > When -s/--summary option is used, it doesn't need (augmented) arguments
> > > of syscalls.  Let's skip the augmentation and load another small BPF
> > > program to collect the statistics in the kernel instead of copying the
> > > data to the ring-buffer to calculate the stats in userspace.  This will
> > > be much more light-weight than the existing approach and remove any lost
> > > events.
> > >
> > > Let's add a new option --bpf-summary to control this behavior.  I cannot
> > > make it default because there's no way to get e_machine in the BPF which
> > > is needed for detecting different ABIs like 32-bit compat mode.
> > >
> > > No functional changes intended except for no more LOST events. :)
> > >
> > >   $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1
> > >
> > >    Summary of events:
> > >
> > >    total, 6194 events
> > >
> > >      syscall            calls  errors  total       min       avg       max       stddev
> > >                                        (msec)    (msec)    (msec)    (msec)        (%)
> > >      --------------- --------  ------ -------- --------- --------- ---------     ------
> > >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> > >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> > >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> > >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> > >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> > >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> > >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> > >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> > >      ...
> 
> I added the following to align sched_[gs]etaffinity,

Thanks for processing the patch and updating this.  But I'm afraid there
are more syscalls with longer names and this is not the only place to
print the syscall names.  Also I think we need to update length of the
time fields.  So I prefer handling them in a separate patch later.

Thanks,
Namhyung
 
> 
> diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c
> index 114d8d9ed9b2d3f3..af37d3bb5f9c42e7 100644
> --- a/tools/perf/util/bpf-trace-summary.c
> +++ b/tools/perf/util/bpf-trace-summary.c
> @@ -139,9 +139,9 @@ static int print_common_stats(struct syscall_data *data, FILE *fp)
>  		/* TODO: support other ABIs */
>  		name = syscalltbl__name(EM_HOST, node->syscall_nr);
>  		if (name)
> -			printed += fprintf(fp, "   %-15s", name);
> +			printed += fprintf(fp, "   %-17s", name);
>  		else
> -			printed += fprintf(fp, "   syscall:%-7d", node->syscall_nr);
> +			printed += fprintf(fp, "   syscall:%-9d", node->syscall_nr);
>  
>  		printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
>  				   stat->count, stat->error, total, min, avg, max,

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-04-23 17:41     ` Namhyung Kim
@ 2025-04-23 20:50       ` Arnaldo Carvalho de Melo
  2025-04-24 22:06         ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-04-23 20:50 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Howard Chu, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

On Wed, Apr 23, 2025 at 10:41:55AM -0700, Namhyung Kim wrote:
> Hi Arnaldo,
> 
> On Wed, Apr 23, 2025 at 01:26:48PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> > > On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > > >      syscall            calls  errors  total       min       avg       max       stddev
> > > >                                        (msec)    (msec)    (msec)    (msec)        (%)
> > > >      --------------- --------  ------ -------- --------- --------- ---------     ------
> > > >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> > > >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> > > >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> > > >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> > > >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> > > >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> > > >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> > > >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> > > >      ...
> > 
> > I added the following to align sched_[gs]etaffinity,
> 
> Thanks for processing the patch and updating this.  But I'm afraid there
> are more syscalls with longer names and this is not the only place to
> print the syscall names.  Also I think we need to update length of the
> time fields.  So I prefer handling them in a separate patch later.

Fair enough, I'm leaving the patch as-is.

- Arnaldo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-04-23 20:50       ` Arnaldo Carvalho de Melo
@ 2025-04-24 22:06         ` Arnaldo Carvalho de Melo
  2025-04-25 21:51           ` Namhyung Kim
  0 siblings, 1 reply; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-04-24 22:06 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Howard Chu, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

On Wed, Apr 23, 2025 at 05:50:51PM -0300, Arnaldo Carvalho de Melo wrote:
> On Wed, Apr 23, 2025 at 10:41:55AM -0700, Namhyung Kim wrote:
> > On Wed, Apr 23, 2025 at 01:26:48PM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> > > > On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > > > >      syscall            calls  errors  total       min       avg       max       stddev

> > > > >      --------------- --------  ------ -------- --------- --------- ---------     ------
> > > > >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> > > > >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> > > > >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> > > > >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> > > > >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> > > > >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> > > > >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> > > > >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> > > > >      ...

> > > I added the following to align sched_[gs]etaffinity,

> > Thanks for processing the patch and updating this.  But I'm afraid there
> > are more syscalls with longer names and this is not the only place to
> > print the syscall names.  Also I think we need to update length of the
> > time fields.  So I prefer handling them in a separate patch later.
 
> Fair enough, I'm leaving the patch as-is.

But, still have to look at this:

toolsbuilder@five:~$ time dm
   1   114.52 almalinux:8                   : Ok   gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-26) , clang version 18.1.8 (Red Hat 18.1.8-1.module_el8.10.0+3903+ca21d481) flex 2.6.1
   2   111.09 almalinux:9                   : Ok   gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) , clang version 18.1.8 (AlmaLinux OS Foundation 18.1.8-3.el9) flex 2.6.4
   3: almalinux:9-i386WARNING: image platform (linux/386) does not match the expected platform (linux/amd64)
WARNING: image platform (linux/386) does not match the expected platform (linux/amd64)
   132.71 almalinux:9-i386              : Ok   gcc (GCC) 11.4.1 20231218 (Red Hat 11.4.1-3) , clang version 17.0.6 (AlmaLinux OS Foundation 17.0.6-5.el9) flex 2.6.4
   4    21.54 alpine:3.16                   : FAIL gcc version 11.2.1 20220219 (Alpine 11.2.1_git20220219) 
    bpf-trace-summary.c:(.text+0xf0760): undefined reference to `syscalltbl__name'
    collect2: error: ld returned 1 exit status
   5    16.50 alpine:3.17                   : FAIL gcc version 12.2.1 20220924 (Alpine 12.2.1_git20220924-r4) 
    bpf-trace-summary.c:(.text+0xf2020): undefined reference to `syscalltbl__name'
    collect2: error: ld returned 1 exit status

More info:

perf-6.15.0-rc2/HEAD
perf-6.15.0-rc2/PERF-VERSION-FILE
BUILD_TARBALL_HEAD=24c0c35d4640052c61ed539a777bd3bd60d62bbf
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-alpine-linux-musl/12.2.1/lto-wrapper
Target: x86_64-alpine-linux-musl
Configured with: /home/buildozer/aports/main/gcc/src/gcc-12-20220924/configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl --target=x86_64-alpine-linux-musl --enable-checking=release --disable-fixed-point --disable-libstdcxx-pch --disable-multilib --disable-nls --disable-werror --disable-symvers --enable-__cxa_atexit --enable-default-pie --enable-default-ssp --enable-languages=c,c++,d,objc,go,fortran,ada --disable-libssp --disable-libsanitizer --enable-shared --enable-threads --enable-tls --with-bugurl=https://gitlab.alpinelinux.org/alpine/aports/-/issues --with-system-zlib --with-linker-hash-style=gnu --with-pkgversion='Alpine 12.2.1_git20220924-r4'
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 12.2.1 20220924 (Alpine 12.2.1_git20220924-r4) 
+ make 'NO_LIBTRACEEVENT=1' 'ARCH=' 'CROSS_COMPILE=' 'EXTRA_CFLAGS=' -C tools/perf 'O=/tmp/build/perf'
make: Entering directory '/git/perf-6.15.0-rc2/tools/perf'
  BUILD:   Doing 'make -j28' parallel build
Warning: Skipped check-headers due to missing ../../include
Makefile.config:563: No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent
Makefile.config:605: No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev
Makefile.config:1085: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev
Makefile.config:1128: No alternatives command found, you need to set JDIR= to point to the root of your Java directory
Makefile.config:1159: libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev

Auto-detecting system features:
...                                   libdw: [ on  ]
...                                   glibc: [ OFF ]
...                                  libelf: [ on  ]
...                                 libnuma: [ on  ]
...                  numa_num_possible_cpus: [ on  ]
...                                 libperl: [ on  ]
...                               libpython: [ on  ]
...                               libcrypto: [ on  ]
...                             libcapstone: [ on  ]
...                               llvm-perf: [ on  ]
...                                    zlib: [ on  ]
...                                    lzma: [ on  ]
...                               get_cpuid: [ on  ]
...                                     bpf: [ on  ]
...                                  libaio: [ on  ]
...                                 libzstd: [ on  ]

  PERF_VERSION = 6.15.rc2.g24c0c35d4640
  GEN     /tmp/build/perf/common-cmds.h
  GEN     /tmp/build/perf/arch/arm64/include/generated/asm/sysreg-defs.h
  GEN     perf-archive
  GEN     perf-iostat
<SNIP>
  CC      /tmp/build/perf/util/bpf-filter-flex.o
  LD      /tmp/build/perf/util/perf-util-in.o
  LD      /tmp/build/perf/perf-util-in.o
  AR      /tmp/build/perf/libperf-util.a
  CC      /tmp/build/perf/pmu-events/pmu-events.o
  LD      /tmp/build/perf/pmu-events/pmu-events-in.o
  AR      /tmp/build/perf/libpmu-events.a
  LINK    /tmp/build/perf/perf
  GEN     /tmp/build/perf/python/perf.cpython-310-x86_64-linux-gnu.so
/usr/lib/gcc/x86_64-alpine-linux-musl/12.2.1/../../../../x86_64-alpine-linux-musl/bin/ld: /tmp/build/perf/libperf-util.a(perf-util-in.o): in function `print_common_stats':
bpf-trace-summary.c:(.text+0xf2020): undefined reference to `syscalltbl__name'
collect2: error: ld returned 1 exit status
make[2]: *** [Makefile.perf:804: /tmp/build/perf/perf] Error 1
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [Makefile.perf:290: sub-make] Error 2
make: *** [Makefile:76: all] Error 2
make: Leaving directory '/git/perf-6.15.0-rc2/tools/perf'
+ exit 1
toolsbuilder@five:~$ 

I'll take a look tomorrow.

- Arnaldo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 1/2] perf trace: Implement syscall summary in BPF
  2025-04-24 22:06         ` Arnaldo Carvalho de Melo
@ 2025-04-25 21:51           ` Namhyung Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2025-04-25 21:51 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Howard Chu, Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users, Song Liu,
	bpf

Hi Arnaldo,

On Thu, Apr 24, 2025 at 07:06:32PM -0300, Arnaldo Carvalho de Melo wrote:
> On Wed, Apr 23, 2025 at 05:50:51PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Wed, Apr 23, 2025 at 10:41:55AM -0700, Namhyung Kim wrote:
> > > On Wed, Apr 23, 2025 at 01:26:48PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > On Fri, Mar 28, 2025 at 06:46:36PM -0700, Howard Chu wrote:
> > > > > On Tue, Mar 25, 2025 at 9:40 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > > > > >      syscall            calls  errors  total       min       avg       max       stddev
> 
> > > > > >      --------------- --------  ------ -------- --------- --------- ---------     ------
> > > > > >      epoll_wait           561      0  4530.843     0.000     8.076   520.941     18.75%
> > > > > >      futex                693     45  4317.231     0.000     6.230   500.077     21.98%
> > > > > >      poll                 300      0  1040.109     0.000     3.467   120.928     17.02%
> > > > > >      clock_nanosleep        1      0  1000.172  1000.172  1000.172  1000.172      0.00%
> > > > > >      ppoll                360      0   872.386     0.001     2.423   253.275     41.91%
> > > > > >      epoll_pwait           14      0   384.349     0.001    27.453   380.002     98.79%
> > > > > >      pselect6              14      0   108.130     7.198     7.724     8.206      0.85%
> > > > > >      nanosleep             39      0    43.378     0.069     1.112    10.084     44.23%
> > > > > >      ...
> 
> > > > I added the following to align sched_[gs]etaffinity,
> 
> > > Thanks for processing the patch and updating this.  But I'm afraid there
> > > are more syscalls with longer names and this is not the only place to
> > > print the syscall names.  Also I think we need to update length of the
> > > time fields.  So I prefer handling them in a separate patch later.
>  
> > Fair enough, I'm leaving the patch as-is.
> 
> But, still have to look at this:
> 
> toolsbuilder@five:~$ time dm
>    1   114.52 almalinux:8                   : Ok   gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-26) , clang version 18.1.8 (Red Hat 18.1.8-1.module_el8.10.0+3903+ca21d481) flex 2.6.1
>    2   111.09 almalinux:9                   : Ok   gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) , clang version 18.1.8 (AlmaLinux OS Foundation 18.1.8-3.el9) flex 2.6.4
>    3: almalinux:9-i386WARNING: image platform (linux/386) does not match the expected platform (linux/amd64)
> WARNING: image platform (linux/386) does not match the expected platform (linux/amd64)
>    132.71 almalinux:9-i386              : Ok   gcc (GCC) 11.4.1 20231218 (Red Hat 11.4.1-3) , clang version 17.0.6 (AlmaLinux OS Foundation 17.0.6-5.el9) flex 2.6.4
>    4    21.54 alpine:3.16                   : FAIL gcc version 11.2.1 20220219 (Alpine 11.2.1_git20220219) 
>     bpf-trace-summary.c:(.text+0xf0760): undefined reference to `syscalltbl__name'
>     collect2: error: ld returned 1 exit status
>    5    16.50 alpine:3.17                   : FAIL gcc version 12.2.1 20220924 (Alpine 12.2.1_git20220924-r4) 
>     bpf-trace-summary.c:(.text+0xf2020): undefined reference to `syscalltbl__name'
>     collect2: error: ld returned 1 exit status
> 
> More info:
> 
> perf-6.15.0-rc2/HEAD
> perf-6.15.0-rc2/PERF-VERSION-FILE
> BUILD_TARBALL_HEAD=24c0c35d4640052c61ed539a777bd3bd60d62bbf
> Using built-in specs.
> COLLECT_GCC=gcc
> COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-alpine-linux-musl/12.2.1/lto-wrapper
> Target: x86_64-alpine-linux-musl
> Configured with: /home/buildozer/aports/main/gcc/src/gcc-12-20220924/configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl --target=x86_64-alpine-linux-musl --enable-checking=release --disable-fixed-point --disable-libstdcxx-pch --disable-multilib --disable-nls --disable-werror --disable-symvers --enable-__cxa_atexit --enable-default-pie --enable-default-ssp --enable-languages=c,c++,d,objc,go,fortran,ada --disable-libssp --disable-libsanitizer --enable-shared --enable-threads --enable-tls --with-bugurl=https://gitlab.alpinelinux.org/alpine/aports/-/issues --with-system-zlib --with-linker-hash-style=gnu --with-pkgversion='Alpine 12.2.1_git20220924-r4'
> Thread model: posix
> Supported LTO compression algorithms: zlib
> gcc version 12.2.1 20220924 (Alpine 12.2.1_git20220924-r4) 
> + make 'NO_LIBTRACEEVENT=1' 'ARCH=' 'CROSS_COMPILE=' 'EXTRA_CFLAGS=' -C tools/perf 'O=/tmp/build/perf'
> make: Entering directory '/git/perf-6.15.0-rc2/tools/perf'
>   BUILD:   Doing 'make -j28' parallel build
> Warning: Skipped check-headers due to missing ../../include
> Makefile.config:563: No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent
> Makefile.config:605: No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev
> Makefile.config:1085: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev
> Makefile.config:1128: No alternatives command found, you need to set JDIR= to point to the root of your Java directory
> Makefile.config:1159: libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev
> 
> Auto-detecting system features:
> ...                                   libdw: [ on  ]
> ...                                   glibc: [ OFF ]
> ...                                  libelf: [ on  ]
> ...                                 libnuma: [ on  ]
> ...                  numa_num_possible_cpus: [ on  ]
> ...                                 libperl: [ on  ]
> ...                               libpython: [ on  ]
> ...                               libcrypto: [ on  ]
> ...                             libcapstone: [ on  ]
> ...                               llvm-perf: [ on  ]
> ...                                    zlib: [ on  ]
> ...                                    lzma: [ on  ]
> ...                               get_cpuid: [ on  ]
> ...                                     bpf: [ on  ]
> ...                                  libaio: [ on  ]
> ...                                 libzstd: [ on  ]
> 
>   PERF_VERSION = 6.15.rc2.g24c0c35d4640
>   GEN     /tmp/build/perf/common-cmds.h
>   GEN     /tmp/build/perf/arch/arm64/include/generated/asm/sysreg-defs.h
>   GEN     perf-archive
>   GEN     perf-iostat
> <SNIP>
>   CC      /tmp/build/perf/util/bpf-filter-flex.o
>   LD      /tmp/build/perf/util/perf-util-in.o
>   LD      /tmp/build/perf/perf-util-in.o
>   AR      /tmp/build/perf/libperf-util.a
>   CC      /tmp/build/perf/pmu-events/pmu-events.o
>   LD      /tmp/build/perf/pmu-events/pmu-events-in.o
>   AR      /tmp/build/perf/libpmu-events.a
>   LINK    /tmp/build/perf/perf
>   GEN     /tmp/build/perf/python/perf.cpython-310-x86_64-linux-gnu.so
> /usr/lib/gcc/x86_64-alpine-linux-musl/12.2.1/../../../../x86_64-alpine-linux-musl/bin/ld: /tmp/build/perf/libperf-util.a(perf-util-in.o): in function `print_common_stats':
> bpf-trace-summary.c:(.text+0xf2020): undefined reference to `syscalltbl__name'
> collect2: error: ld returned 1 exit status
> make[2]: *** [Makefile.perf:804: /tmp/build/perf/perf] Error 1
> make[2]: *** Waiting for unfinished jobs....
> make[1]: *** [Makefile.perf:290: sub-make] Error 2
> make: *** [Makefile:76: all] Error 2
> make: Leaving directory '/git/perf-6.15.0-rc2/tools/perf'
> + exit 1
> toolsbuilder@five:~$ 
> 
> I'll take a look tomorrow.

Thanks for the report.  I think it's because syscalltbl.c depends on
CONFIG_TRACE but bpf-trace-summary depends on CONFIG_PERF_BPF_SKEL.

In the future, I'd like to get rid of dependency to libtraceevent in
perf trace and make it possible to use BPF/BTF only.

How about this?

Thanks,
Namhyung


---8<---
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 4f00cde8c3ea63eb..7ae5b4b9330af0ce 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -173,9 +173,12 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
-perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
 
+ifeq ($(CONFIG_TRACE),y)
+  perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o
+endif
+
 ifeq ($(CONFIG_LIBTRACEEVENT),y)
   perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
 endif


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-04-25 21:51 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-26  4:40 [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Namhyung Kim
2025-03-26  4:40 ` [PATCH v4 2/2] perf test: Add perf trace summary test Namhyung Kim
2025-03-29  1:48   ` Howard Chu
2025-03-29  1:46 ` [PATCH v4 1/2] perf trace: Implement syscall summary in BPF Howard Chu
2025-04-23 16:19   ` Arnaldo Carvalho de Melo
2025-04-23 16:26   ` Arnaldo Carvalho de Melo
2025-04-23 17:41     ` Namhyung Kim
2025-04-23 20:50       ` Arnaldo Carvalho de Melo
2025-04-24 22:06         ` Arnaldo Carvalho de Melo
2025-04-25 21:51           ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).