From: Andi Kleen <andi@firstfloor.org>
To: acme@kernel.org
Cc: jolsa@kernel.org, linux-kernel@vger.kernel.org,
Andi Kleen <ak@linux.intel.com>
Subject: [PATCH v7 01/12] perf pmu: Use file system cache to optimize sysfs access
Date: Fri, 15 Nov 2019 21:52:18 -0800 [thread overview]
Message-ID: <20191116055229.62002-2-andi@firstfloor.org> (raw)
In-Reply-To: <20191116055229.62002-1-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
pmu.c does a lot of redundant /sys accesses while parsing aliases
and probing for PMUs. On large systems with a lot of PMUs this
can get expensive (>2s):
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
27.25 1.227847 8 160888 16976 openat
26.42 1.190481 7 164224 164077 stat
Add a cache to remember if specific file names exist or don't
exist, which eliminates most of this overhead.
Also optimize some stat() calls to be slightly cheaper access()
Resulting in:
0.18 0.004166 2 1851 305 open
0.08 0.001970 2 829 622 access
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
v2: Use single lookup function as API (Jiri)
---
tools/perf/util/Build | 1 +
tools/perf/util/fncache.c | 63 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/fncache.h | 7 +++++
tools/perf/util/pmu.c | 34 +++++++--------------
tools/perf/util/srccode.c | 9 +-----
5 files changed, 83 insertions(+), 31 deletions(-)
create mode 100644 tools/perf/util/fncache.c
create mode 100644 tools/perf/util/fncache.h
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index b8e05a147b2b..aab05e2c01a5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -49,6 +49,7 @@ perf-y += header.o
perf-y += callchain.o
perf-y += values.o
perf-y += debug.o
+perf-y += fncache.o
perf-y += machine.o
perf-y += map.o
perf-y += pstack.o
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
new file mode 100644
index 000000000000..5afcd7edbe7a
--- /dev/null
+++ b/tools/perf/util/fncache.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Manage a cache of file names' existence */
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/list.h>
+#include "fncache.h"
+
+struct fncache {
+ struct hlist_node nd;
+ bool res;
+ char name[];
+};
+
+#define FNHSIZE 61
+
+static struct hlist_head fncache_hash[FNHSIZE];
+
+unsigned shash(const unsigned char *s)
+{
+ unsigned h = 0;
+ while (*s)
+ h = 65599 * h + *s++;
+ return h ^ (h >> 16);
+}
+
+static bool lookup_fncache(const char *name, bool *res)
+{
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+ struct fncache *n;
+
+ hlist_for_each_entry (n, &fncache_hash[h], nd) {
+ if (!strcmp(n->name, name)) {
+ *res = n->res;
+ return true;
+ }
+ }
+ return false;
+}
+
+static void update_fncache(const char *name, bool res)
+{
+ struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+
+ if (!n)
+ return;
+ strcpy(n->name, name);
+ n->res = res;
+ hlist_add_head(&n->nd, &fncache_hash[h]);
+}
+
+/* No LRU, only use when bounded in some other way. */
+bool file_available(const char *name)
+{
+ bool res;
+
+ if (lookup_fncache(name, &res))
+ return res;
+ res = access(name, R_OK) == 0;
+ update_fncache(name, res);
+ return res;
+}
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
new file mode 100644
index 000000000000..fe020beaefb1
--- /dev/null
+++ b/tools/perf/util/fncache.h
@@ -0,0 +1,7 @@
+#ifndef _FCACHE_H
+#define _FCACHE_H 1
+
+unsigned shash(const unsigned char *s);
+bool file_available(const char *name);
+
+#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index db1e57113f4b..65780494a290 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -24,6 +24,7 @@
#include "pmu-events/pmu-events.h"
#include "string2.h"
#include "strbuf.h"
+#include "fncache.h"
struct perf_pmu_format {
char *name;
@@ -82,7 +83,6 @@ int perf_pmu__format_parse(char *dir, struct list_head *head)
*/
static int pmu_format(const char *name, struct list_head *format)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -92,8 +92,8 @@ static int pmu_format(const char *name, struct list_head *format)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
- if (stat(path, &st) < 0)
- return 0; /* no error if format does not exist */
+ if (!file_available(path))
+ return 0;
if (perf_pmu__format_parse(path, format))
return -1;
@@ -475,7 +475,6 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
*/
static int pmu_aliases(const char *name, struct list_head *head)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -485,8 +484,8 @@ static int pmu_aliases(const char *name, struct list_head *head)
snprintf(path, PATH_MAX,
"%s/bus/event_source/devices/%s/events", sysfs, name);
- if (stat(path, &st) < 0)
- return 0; /* no error if 'events' does not exist */
+ if (!file_available(path))
+ return 0;
if (pmu_aliases_parse(path, head))
return -1;
@@ -525,7 +524,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
*/
static int pmu_type(const char *name, __u32 *type)
{
- struct stat st;
char path[PATH_MAX];
FILE *file;
int ret = 0;
@@ -537,7 +535,7 @@ static int pmu_type(const char *name, __u32 *type)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
- if (stat(path, &st) < 0)
+ if (access(path, R_OK) < 0)
return -1;
file = fopen(path, "r");
@@ -628,14 +626,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name)
static bool pmu_is_uncore(const char *name)
{
char path[PATH_MAX];
- struct perf_cpu_map *cpus;
- const char *sysfs = sysfs__mountpoint();
+ const char *sysfs;
+ sysfs = sysfs__mountpoint();
snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
- cpus = __pmu_cpumask(path);
- perf_cpu_map__put(cpus);
-
- return !!cpus;
+ return file_available(path);
}
/*
@@ -645,7 +640,6 @@ static bool pmu_is_uncore(const char *name)
*/
static int is_arm_pmu_core(const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -655,10 +649,7 @@ static int is_arm_pmu_core(const char *name)
/* Look for cpu sysfs (specific to arm) */
scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
sysfs, name);
- if (stat(path, &st) == 0)
- return 1;
-
- return 0;
+ return file_available(path);
}
static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
@@ -1534,7 +1525,6 @@ bool pmu_have_event(const char *pname, const char *name)
static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs;
@@ -1544,10 +1534,8 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
-
- if (stat(path, &st) < 0)
+ if (!file_available(path))
return NULL;
-
return fopen(path, "r");
}
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
index d84ed8b6caaa..c29edaaca863 100644
--- a/tools/perf/util/srccode.c
+++ b/tools/perf/util/srccode.c
@@ -16,6 +16,7 @@
#include "srccode.h"
#include "debug.h"
#include <internal/lib.h> // page_size
+#include "fncache.h"
#define MAXSRCCACHE (32*1024*1024)
#define MAXSRCFILES 64
@@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list);
static long map_total_sz;
static int num_srcfiles;
-static unsigned shash(unsigned char *s)
-{
- unsigned h = 0;
- while (*s)
- h = 65599 * h + *s++;
- return h ^ (h >> 16);
-}
-
static int countlines(char *map, int maplen)
{
int numl;
--
2.23.0
next prev parent reply other threads:[~2019-11-16 5:52 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-16 5:52 Optimize perf stat for large number of events/cpus Andi Kleen
2019-11-16 5:52 ` Andi Kleen [this message]
2019-11-16 5:52 ` [PATCH v7 02/12] perf affinity: Add infrastructure to save/restore affinity Andi Kleen
2019-11-16 5:52 ` [PATCH v7 03/12] perf cpumap: Maintain cpumaps ordered and without dups Andi Kleen
2019-11-16 5:52 ` [PATCH v7 04/12] perf evlist: Maintain evlist->all_cpus Andi Kleen
2019-11-16 5:52 ` [PATCH v7 05/12] perf evsel: Add iterator to iterate over events ordered by CPU Andi Kleen
2019-11-16 5:52 ` [PATCH v7 06/12] perf evsel: Add functions to close evsel on a CPU Andi Kleen
2019-11-16 5:52 ` [PATCH v7 07/12] perf stat: Use affinity for closing file descriptors Andi Kleen
2019-11-16 5:52 ` [PATCH v7 08/12] perf stat: Factor out open error handling Andi Kleen
2019-11-16 5:52 ` [PATCH v7 09/12] perf stat: Use affinity for opening events Andi Kleen
2019-11-20 15:07 ` Jiri Olsa
2019-11-20 22:31 ` Andi Kleen
2019-11-21 15:13 ` Jiri Olsa
2019-11-16 5:52 ` [PATCH v7 10/12] perf stat: Use affinity for reading Andi Kleen
2019-11-20 15:12 ` Jiri Olsa
2019-11-16 5:52 ` [PATCH v7 11/12] perf evsel: Add functions to enable/disable for a specific CPU Andi Kleen
2019-11-16 5:52 ` [PATCH v7 12/12] perf stat: Use affinity for enabling/disabling events Andi Kleen
2019-11-20 15:16 ` Optimize perf stat for large number of events/cpus Jiri Olsa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191116055229.62002-2-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.