From: Andi Kleen <andi@firstfloor.org>
To: acme@kernel.org
Cc: linux-kernel@vger.kernel.org, jolsa@kernel.org,
eranian@google.com, kan.liang@linux.intel.com,
peterz@infradead.org, Andi Kleen <ak@linux.intel.com>
Subject: [PATCH v2 3/9] perf pmu: Use file system cache to optimize sysfs access
Date: Sun, 20 Oct 2019 10:51:56 -0700 [thread overview]
Message-ID: <20191020175202.32456-4-andi@firstfloor.org> (raw)
In-Reply-To: <20191020175202.32456-1-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
pmu.c does a lot of redundant /sys accesses while parsing aliases
and probing for PMUs. On large systems with a lot of PMUs this
can get expensive (>2s):
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
27.25 1.227847 8 160888 16976 openat
26.42 1.190481 7 164224 164077 stat
Add a cache to remember if specific file names exist or don't
exist, which eliminates most of this overhead.
Also optimize some stat() calls to be slightly cheaper access()
Resulting in:
0.18 0.004166 2 1851 305 open
0.08 0.001970 2 829 622 access
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/perf/util/Build | 1 +
tools/perf/util/fncache.c | 52 ++++++++++++++++++++++++++++++++++++++
tools/perf/util/fncache.h | 8 ++++++
tools/perf/util/pmu.c | 53 ++++++++++++++++++++++++---------------
tools/perf/util/srccode.c | 9 +------
5 files changed, 95 insertions(+), 28 deletions(-)
create mode 100644 tools/perf/util/fncache.c
create mode 100644 tools/perf/util/fncache.h
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 39814b1806a6..2c1504fe924c 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -48,6 +48,7 @@ perf-y += header.o
perf-y += callchain.o
perf-y += values.o
perf-y += debug.o
+perf-y += fncache.o
perf-y += machine.o
perf-y += map.o
perf-y += pstack.o
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
new file mode 100644
index 000000000000..0e6e2370b3af
--- /dev/null
+++ b/tools/perf/util/fncache.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Manage a cache of file names' existence */
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/list.h>
+#include "fncache.h"
+
+struct fncache {
+ struct hlist_node nd;
+ bool res;
+ char name[];
+};
+
+#define FNHSIZE 61
+
+static struct hlist_head fncache_hash[FNHSIZE];
+
+unsigned shash(const unsigned char *s)
+{
+ unsigned h = 0;
+ while (*s)
+ h = 65599 * h + *s++;
+ return h ^ (h >> 16);
+}
+
+bool lookup_fncache(const char *name, bool *res)
+{
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+ struct fncache *n;
+
+ hlist_for_each_entry (n, &fncache_hash[h], nd) {
+ if (!strcmp(n->name, name)) {
+ *res = n->res;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* No LRU, only use when bounded in some other way. */
+void update_fncache(const char *name, bool res)
+{
+ struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+
+ if (!n)
+ return;
+ strcpy(n->name, name);
+ n->res = res;
+ hlist_add_head(&n->nd, &fncache_hash[h]);
+}
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
new file mode 100644
index 000000000000..93ca473f5357
--- /dev/null
+++ b/tools/perf/util/fncache.h
@@ -0,0 +1,8 @@
+#ifndef _FCACHE_H
+#define _FCACHE_H 1
+
+unsigned shash(const unsigned char *s);
+void update_fncache(const char *name, bool res);
+bool lookup_fncache(const char *name, bool *res);
+
+#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 5608da82ad23..ae5e6e894e79 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -24,6 +24,7 @@
#include "pmu-events/pmu-events.h"
#include "string2.h"
#include "strbuf.h"
+#include "fncache.h"
struct perf_pmu_format {
char *name;
@@ -82,9 +83,9 @@ int perf_pmu__format_parse(char *dir, struct list_head *head)
*/
static int pmu_format(const char *name, struct list_head *format)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
+ bool res = false;
if (!sysfs)
return -1;
@@ -92,8 +93,12 @@ static int pmu_format(const char *name, struct list_head *format)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
- if (stat(path, &st) < 0)
+ if (lookup_fncache(path, &res) && !res)
+ return 0;
+
+ if (!res && access(path, R_OK) < 0)
return 0; /* no error if format does not exist */
+ update_fncache(path, true);
if (perf_pmu__format_parse(path, format))
return -1;
@@ -470,9 +475,9 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
*/
static int pmu_aliases(const char *name, struct list_head *head)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
+ bool res = false;
if (!sysfs)
return -1;
@@ -480,8 +485,11 @@ static int pmu_aliases(const char *name, struct list_head *head)
snprintf(path, PATH_MAX,
"%s/bus/event_source/devices/%s/events", sysfs, name);
- if (stat(path, &st) < 0)
- return 0; /* no error if 'events' does not exist */
+ if (lookup_fncache(path, &res) && !res)
+ return 0;
+ if (!res && access(path, R_OK) < 0)
+ return 0;
+ update_fncache(path, true);
if (pmu_aliases_parse(path, head))
return -1;
@@ -520,7 +528,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
*/
static int pmu_type(const char *name, __u32 *type)
{
- struct stat st;
char path[PATH_MAX];
FILE *file;
int ret = 0;
@@ -532,7 +539,7 @@ static int pmu_type(const char *name, __u32 *type)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
- if (stat(path, &st) < 0)
+ if (access(path, R_OK) < 0)
return -1;
file = fopen(path, "r");
@@ -623,14 +630,16 @@ static struct perf_cpu_map *pmu_cpumask(const char *name)
static bool pmu_is_uncore(const char *name)
{
char path[PATH_MAX];
- struct perf_cpu_map *cpus;
- const char *sysfs = sysfs__mountpoint();
+ const char *sysfs;
+ bool res;
+ sysfs = sysfs__mountpoint();
snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
- cpus = __pmu_cpumask(path);
- perf_cpu_map__put(cpus);
-
- return !!cpus;
+ if (lookup_fncache(path, &res))
+ return res;
+ res = access(path, R_OK) == 0;
+ update_fncache(path, res);
+ return res;
}
/*
@@ -640,9 +649,9 @@ static bool pmu_is_uncore(const char *name)
*/
static int is_arm_pmu_core(const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
+ bool res;
if (!sysfs)
return 0;
@@ -650,10 +659,11 @@ static int is_arm_pmu_core(const char *name)
/* Look for cpu sysfs (specific to arm) */
scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
sysfs, name);
- if (stat(path, &st) == 0)
- return 1;
-
- return 0;
+ if (lookup_fncache(path, &res))
+ return res;
+ res = access(path, R_OK) == 0;
+ update_fncache(path, res);
+ return res;
}
static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
@@ -1519,9 +1529,9 @@ bool pmu_have_event(const char *pname, const char *name)
static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs;
+ bool res = false;
sysfs = sysfs__mountpoint();
if (!sysfs)
@@ -1530,8 +1540,11 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
- if (stat(path, &st) < 0)
+ if (lookup_fncache(path, &res) && !res)
+ return NULL;
+ if (!res && access(path, R_OK) < 0)
return NULL;
+ update_fncache(path, true);
return fopen(path, "r");
}
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
index d84ed8b6caaa..c29edaaca863 100644
--- a/tools/perf/util/srccode.c
+++ b/tools/perf/util/srccode.c
@@ -16,6 +16,7 @@
#include "srccode.h"
#include "debug.h"
#include <internal/lib.h> // page_size
+#include "fncache.h"
#define MAXSRCCACHE (32*1024*1024)
#define MAXSRCFILES 64
@@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list);
static long map_total_sz;
static int num_srcfiles;
-static unsigned shash(unsigned char *s)
-{
- unsigned h = 0;
- while (*s)
- h = 65599 * h + *s++;
- return h ^ (h >> 16);
-}
-
static int countlines(char *map, int maplen)
{
int numl;
--
2.21.0
next prev parent reply other threads:[~2019-10-20 17:52 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-20 17:51 Optimize perf stat for large number of events/cpus v2 Andi Kleen
2019-10-20 17:51 ` [PATCH v2 1/9] perf evsel: Always preserve errno while cleaning up perf_event_open failures Andi Kleen
2019-10-22 8:01 ` Jiri Olsa
2019-11-12 11:18 ` [tip: perf/core] " tip-bot2 for Andi Kleen
2019-10-20 17:51 ` [PATCH v2 2/9] perf evsel: Avoid close(-1) Andi Kleen
2019-10-22 8:01 ` Jiri Olsa
2019-11-12 11:18 ` [tip: perf/core] " tip-bot2 for Andi Kleen
2019-10-20 17:51 ` Andi Kleen [this message]
2019-10-23 9:47 ` [PATCH v2 3/9] perf pmu: Use file system cache to optimize sysfs access Jiri Olsa
2019-10-20 17:51 ` [PATCH v2 4/9] perf affinity: Add infrastructure to save/restore affinity Andi Kleen
2019-10-23 9:59 ` Jiri Olsa
2019-10-23 13:02 ` Andi Kleen
2019-10-23 14:30 ` Jiri Olsa
2019-10-23 14:52 ` Andi Kleen
2019-10-23 16:16 ` Alexey Budankov
2019-10-23 17:19 ` Andi Kleen
2019-10-23 18:08 ` Alexey Budankov
2019-10-23 22:37 ` Andi Kleen
2019-10-24 8:46 ` Alexey Budankov
2019-10-20 17:51 ` [PATCH v2 5/9] perf evsel: Add iterator to iterate over events ordered by CPU Andi Kleen
2019-10-20 17:51 ` [PATCH v2 6/9] perf stat: Use affinity for closing file descriptors Andi Kleen
2019-10-20 17:52 ` [PATCH v2 7/9] perf stat: Use affinity for opening events Andi Kleen
2019-10-20 17:52 ` [PATCH v2 8/9] perf stat: Use affinity for reading Andi Kleen
2019-10-20 17:52 ` [PATCH v2 9/9] perf stat: Use affinity for enabling/disabling events Andi Kleen
2019-10-23 10:30 ` Jiri Olsa
2019-10-23 13:07 ` Andi Kleen
2019-10-22 8:02 ` Optimize perf stat for large number of events/cpus v2 Jiri Olsa
2019-10-22 14:11 ` Arnaldo Carvalho de Melo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191020175202.32456-4-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=eranian@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.