All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: acme@kernel.org
Cc: linux-kernel@vger.kernel.org, jolsa@kernel.org,
	eranian@google.com, kan.liang@linux.intel.com,
	peterz@infradead.org, Andi Kleen <ak@linux.intel.com>
Subject: [PATCH v2 6/9] perf stat: Use affinity for closing file descriptors
Date: Sun, 20 Oct 2019 10:51:59 -0700	[thread overview]
Message-ID: <20191020175202.32456-7-andi@firstfloor.org> (raw)
In-Reply-To: <20191020175202.32456-1-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Closing a perf fd can also trigger an IPI to the target CPU.
Use the same affinity technique as we use for reading/enabling events
to closing to optimize the CPU transitions.

Before on a large test case with 94 CPUs:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 32.56    3.085463          50     61483           close

After:

 10.54    0.735704          11     61485           close

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/lib/evsel.c              | 27 +++++++++++++++++++------
 tools/perf/lib/include/perf/evsel.h |  1 +
 tools/perf/util/evlist.c            | 31 +++++++++++++++++++++++++++--
 tools/perf/util/evsel.h             |  1 +
 4 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index 5a89857b0381..ea775dacbd2d 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
 	return err;
 }
 
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+{
+	int thread;
+
+	for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+		if (FD(evsel, cpu, thread) >= 0)
+			close(FD(evsel, cpu, thread));
+		FD(evsel, cpu, thread) = -1;
+	}
+}
+
 void perf_evsel__close_fd(struct perf_evsel *evsel)
 {
-	int cpu, thread;
+	int cpu;
 
 	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
-		for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
-			if (FD(evsel, cpu, thread) >= 0)
-				close(FD(evsel, cpu, thread));
-			FD(evsel, cpu, thread) = -1;
-		}
+		perf_evsel__close_fd_cpu(evsel, cpu);
 }
 
 void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
 	perf_evsel__free_fd(evsel);
 }
 
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+{
+	if (evsel->fd == NULL)
+		return;
+
+	perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
 int perf_evsel__read_size(struct perf_evsel *evsel)
 {
 	u64 read_format = evsel->attr.read_format;
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
index 4388667f265c..ed10a914cd3f 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -28,6 +28,7 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
 LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
 				 struct perf_thread_map *threads);
 LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
 LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
 				 struct perf_counts_values *count);
 LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 27b4b958eddd..b1b29d473a9f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
 #include "debug.h"
 #include "units.h"
 #include <internal/lib.h> // page_size
+#include "affinity.h"
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
@@ -1174,9 +1175,35 @@ void perf_evlist__set_selected(struct evlist *evlist,
 void evlist__close(struct evlist *evlist)
 {
 	struct evsel *evsel;
+	struct affinity affinity;
+	struct perf_cpu_map *cpus;
+	int i;
+
+	/* So far record doesn't set this up */
+	if (!evlist->core.cpus) {
+		evlist__for_each_entry_reverse(evlist, evsel)
+			evsel__close(evsel);
+		return;
+	}
 
-	evlist__for_each_entry_reverse(evlist, evsel)
-		evsel__close(evsel);
+	if (affinity__setup(&affinity) < 0)
+		return;
+	cpus = evlist__cpu_iter_start(evlist);
+	for (i = 0; i < cpus->nr; i++) {
+		int cpu = cpus->map[i];
+		affinity__set(&affinity, cpu);
+
+		evlist__for_each_entry_reverse(evlist, evsel) {
+			if (evlist__cpu_iter_skip(evsel, cpu))
+			    continue;
+			perf_evsel__close_cpu(&evsel->core, evsel->cpu_index);
+			evlist__cpu_iter_next(evsel);
+		}
+	}
+	evlist__for_each_entry_reverse(evlist, evsel) {
+		perf_evsel__free_fd(&evsel->core);
+		perf_evsel__free_id(&evsel->core);
+	}
 }
 
 static int perf_evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index cf90019ae744..2e3b011ed09e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -391,4 +391,5 @@ static inline bool evsel__has_callchain(const struct evsel *evsel)
 struct perf_env *perf_evsel__env(struct evsel *evsel);
 
 int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
 #endif /* __PERF_EVSEL_H */
-- 
2.21.0


  parent reply	other threads:[~2019-10-20 17:52 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-20 17:51 Optimize perf stat for large number of events/cpus v2 Andi Kleen
2019-10-20 17:51 ` [PATCH v2 1/9] perf evsel: Always preserve errno while cleaning up perf_event_open failures Andi Kleen
2019-10-22  8:01   ` Jiri Olsa
2019-11-12 11:18   ` [tip: perf/core] " tip-bot2 for Andi Kleen
2019-10-20 17:51 ` [PATCH v2 2/9] perf evsel: Avoid close(-1) Andi Kleen
2019-10-22  8:01   ` Jiri Olsa
2019-11-12 11:18   ` [tip: perf/core] " tip-bot2 for Andi Kleen
2019-10-20 17:51 ` [PATCH v2 3/9] perf pmu: Use file system cache to optimize sysfs access Andi Kleen
2019-10-23  9:47   ` Jiri Olsa
2019-10-20 17:51 ` [PATCH v2 4/9] perf affinity: Add infrastructure to save/restore affinity Andi Kleen
2019-10-23  9:59   ` Jiri Olsa
2019-10-23 13:02     ` Andi Kleen
2019-10-23 14:30       ` Jiri Olsa
2019-10-23 14:52         ` Andi Kleen
2019-10-23 16:16           ` Alexey Budankov
2019-10-23 17:19             ` Andi Kleen
2019-10-23 18:08               ` Alexey Budankov
2019-10-23 22:37                 ` Andi Kleen
2019-10-24  8:46                   ` Alexey Budankov
2019-10-20 17:51 ` [PATCH v2 5/9] perf evsel: Add iterator to iterate over events ordered by CPU Andi Kleen
2019-10-20 17:51 ` Andi Kleen [this message]
2019-10-20 17:52 ` [PATCH v2 7/9] perf stat: Use affinity for opening events Andi Kleen
2019-10-20 17:52 ` [PATCH v2 8/9] perf stat: Use affinity for reading Andi Kleen
2019-10-20 17:52 ` [PATCH v2 9/9] perf stat: Use affinity for enabling/disabling events Andi Kleen
2019-10-23 10:30   ` Jiri Olsa
2019-10-23 13:07     ` Andi Kleen
2019-10-22  8:02 ` Optimize perf stat for large number of events/cpus v2 Jiri Olsa
2019-10-22 14:11   ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191020175202.32456-7-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.