From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
Clark Williams <williams@redhat.com>,
linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Andi Kleen <ak@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 08/23] perf stat: Use affinity for reading
Date: Tue, 3 Dec 2019 10:55:51 -0300 [thread overview]
Message-ID: <20191203135606.24902-9-acme@kernel.org> (raw)
In-Reply-To: <20191203135606.24902-1-acme@kernel.org>
From: Andi Kleen <ak@linux.intel.com>
Restructure event reading to use affinity to minimize the number of IPIs
needed.
Before on a large test case with 94 CPUs:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
3.16 0.106079 4 22082 read
After:
3.43 0.081295 3 22082 read
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-stat.c | 97 ++++++++++++++++++++++-----------------
tools/perf/util/evsel.h | 1 +
2 files changed, 57 insertions(+), 41 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cf8516e701e2..a098c2ebf4ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
- int ncpus, cpu, thread;
-
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
- ncpus = perf_evsel__nr_cpus(counter);
- else
- ncpus = 1;
+ int thread;
if (!counter->supported)
return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- struct perf_counts_values *count;
-
- count = perf_counts(counter->counts, cpu, thread);
-
- /*
- * The leader's group read loads data into its group members
- * (via perf_evsel__read_counter) and sets threir count->loaded.
- */
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
- }
+ struct perf_counts_values *count;
- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ count = perf_counts(counter->counts, cpu, thread);
- if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
- pr_err("failed to write stat event\n");
- return -1;
- }
- }
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter()) and sets their count->loaded.
+ */
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+ read_single_counter(counter, cpu, thread, rs)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
+
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);
- if (verbose > 1) {
- fprintf(stat_config.output,
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
- cpu,
- count->val, count->ena, count->run);
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}
return 0;
@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
- int ret;
+ struct affinity affinity;
+ int i, ncpus, cpu;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ ncpus = 1;
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ if (i >= ncpus)
+ break;
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ counter->cpu_iter - 1);
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
- ret = read_counter(counter, rs);
- if (ret)
+ if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
-
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
+ counter->err = 0;
}
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ca82a93960cd..c8af4bc23f8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
+ int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
--
2.21.0
next prev parent reply other threads:[~2019-12-03 13:55 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-03 13:55 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 01/23] perf cpumap: Maintain cpumaps ordered and without dups Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 02/23] perf evlist: Maintain evlist->all_cpus Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 03/23] perf evsel: Add iterator to iterate over events ordered by CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 04/23] perf evsel: Add functions to close evsel on a CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 05/23] perf stat: Use affinity for closing file descriptors Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 06/23] perf stat: Factor out open error handling Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 07/23] perf stat: Use affinity for opening events Arnaldo Carvalho de Melo
2019-12-03 13:55 ` Arnaldo Carvalho de Melo [this message]
2019-12-03 13:55 ` [PATCH 09/23] perf evsel: Add functions to enable/disable for a specific CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 10/23] perf stat: Use affinity for enabling/disabling events Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 11/23] perf jit: Move test functionality in to a test Arnaldo Carvalho de Melo
2019-12-03 13:55 ` Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 12/23] perf machine: Fill map_symbol->maps in append_inlines() to fix segfault Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 13/23] perf bench: Update the copies of x86's mem{cpy,set}_64.S Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 14/23] tools arch x86: Sync the msr-index.h copy with the kernel sources Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 15/23] tools headers uapi: Sync linux/fscrypt.h " Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 16/23] tools headers uapi: Sync linux/stat.h " Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 17/23] tools headers kvm: Sync kvm headers " Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 18/23] tools headers UAPI: Sync sched.h with the kernel Arnaldo Carvalho de Melo
2019-12-03 15:40 ` Christian Brauner
2019-12-03 13:56 ` [PATCH 19/23] perf beauty: Add CLEAR_SIGHAND support for clone's flags arg Arnaldo Carvalho de Melo
2019-12-03 15:41 ` Christian Brauner
2019-12-03 13:56 ` [PATCH 20/23] tools arch x86: Sync asm/cpufeatures.h with the kernel sources Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 21/23] perf kvm: Clarify the 'perf kvm' -i and -o command line options Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 22/23] libtraceevent: Fix lib installation with O= Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 23/23] libtraceevent: Copy pkg-config file to output folder when using O= Arnaldo Carvalho de Melo
2019-12-04 7:51 ` [GIT PULL] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191203135606.24902-9-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=tglx@linutronix.de \
--cc=williams@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.