From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Jiri Olsa <jolsa@kernel.org>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
David Ahern <dsahern@gmail.com>,
Namhyung Kim <namhyung@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 46/54] perf tools: Add MEM_TOPOLOGY feature to perf data file
Date: Thu, 8 Mar 2018 16:50:21 -0300 [thread overview]
Message-ID: <20180308195029.14991-47-acme@kernel.org> (raw)
In-Reply-To: <20180308195029.14991-1-acme@kernel.org>
From: Jiri Olsa <jolsa@kernel.org>
Adding MEM_TOPOLOGY feature to perf data file,
that will carry physical memory map and its
node assignments.
The format of data in MEM_TOPOLOGY is as follows:
0 - version | for future changes
8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
16 - count | number of nodes
For each node we store map of physical indexes for
each node:
32 - node id | node index
40 - size | size of bitmap
48 - bitmap | bitmap of memory indexes that belongs to node
| /sys/devices/system/node/node<NODE>/memory<INDEX>
The MEM_TOPOLOGY could be displayed with following
report command:
$ perf report --header-only -I
...
# memory nodes (nr 1, block size 0x8000000):
# 0 [7G]: 0-23,32-69
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-8-jolsa@kernel.org
[ Rename 'index' to 'idx', as this breaks the build in rhel5, 6 and other systems where this is used by glibc headers ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/include/linux/bitmap.h | 2 +-
tools/perf/util/env.h | 9 ++
| 305 +++++++++++++++++++++++++++++++++++++++++++
| 1 +
4 files changed, 316 insertions(+), 1 deletion(-)
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca160270fdfa..63440cc8d618 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -98,7 +98,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
/**
* bitmap_alloc - Allocate bitmap
- * @nr: Bit to set
+ * @nbits: Number of bits
*/
static inline unsigned long *bitmap_alloc(int nbits)
{
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index bf970f57dce0..c4ef2e523367 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -27,6 +27,12 @@ struct numa_node {
struct cpu_map *map;
};
+struct memory_node {
+ u64 node;
+ u64 size;
+ unsigned long *set;
+};
+
struct perf_env {
char *hostname;
char *os_release;
@@ -43,6 +49,7 @@ struct perf_env {
int nr_sibling_cores;
int nr_sibling_threads;
int nr_numa_nodes;
+ int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
char *cmdline;
@@ -54,6 +61,8 @@ struct perf_env {
struct cpu_cache_level *caches;
int caches_cnt;
struct numa_node *numa_nodes;
+ struct memory_node *memory_nodes;
+ unsigned long long memory_bsize;
};
extern struct perf_env perf_env;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e0c3cad0fd8d..e14b3f7c7212 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/utsname.h>
#include <linux/time64.h>
+#include <dirent.h>
#include "evlist.h"
#include "evsel.h"
@@ -37,6 +38,7 @@
#include "asm/bug.h"
#include "tool.h"
#include "time-utils.h"
+#include "units.h"
#include "sane_ctype.h"
@@ -131,6 +133,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
return __do_write_buf(ff, buf, size);
}
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+ u64 *p = (u64 *) set;
+ int i, ret;
+
+ ret = do_write(ff, &size, sizeof(size));
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_write(ff, p + i, sizeof(*p));
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
/* Return: 0 if succeded, -ERR if failed. */
int write_padded(struct feat_fd *ff, const void *bf,
size_t count, size_t count_aligned)
@@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff)
return NULL;
}
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+ unsigned long *set;
+ u64 size, *p;
+ int i, ret;
+
+ ret = do_read_u64(ff, &size);
+ if (ret)
+ return ret;
+
+ set = bitmap_alloc(size);
+ if (!set)
+ return -ENOMEM;
+
+ bitmap_zero(set, size);
+
+ p = (u64 *) set;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_read_u64(ff, p + i);
+ if (ret < 0) {
+ free(set);
+ return ret;
+ }
+ }
+
+ *pset = set;
+ *psize = size;
+ return 0;
+}
+
static int write_tracing_data(struct feat_fd *ff,
struct perf_evlist *evlist)
{
@@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff,
sizeof(evlist->last_sample_time));
}
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+ unsigned int phys, size = 0;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+
+#define for_each_memory(mem, dir) \
+ while ((ent = readdir(dir))) \
+ if (strcmp(ent->d_name, ".") && \
+ strcmp(ent->d_name, "..") && \
+ sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+ scnprintf(path, PATH_MAX,
+ "%s/devices/system/node/node%lu",
+ sysfs__mountpoint(), idx);
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: cant' open memory sysfs data\n");
+ return -1;
+ }
+
+ for_each_memory(phys, dir) {
+ size = max(phys, size);
+ }
+
+ size++;
+
+ n->set = bitmap_alloc(size);
+ if (!n->set) {
+ closedir(dir);
+ return -ENOMEM;
+ }
+
+ bitmap_zero(n->set, size);
+ n->node = idx;
+ n->size = size;
+
+ rewinddir(dir);
+
+ for_each_memory(phys, dir) {
+ set_bit(phys, n->set);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+ const struct memory_node *na = a;
+ const struct memory_node *nb = b;
+
+ return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ u64 cnt = 0;
+ int ret = 0;
+
+ scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+ sysfs__mountpoint());
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: can't open node sysfs data\n");
+ return -1;
+ }
+
+ while (!ret && (ent = readdir(dir))) {
+ unsigned int idx;
+ int r;
+
+ if (!strcmp(ent->d_name, ".") ||
+ !strcmp(ent->d_name, ".."))
+ continue;
+
+ r = sscanf(ent->d_name, "node%u", &idx);
+ if (r != 1)
+ continue;
+
+ if (WARN_ONCE(cnt >= size,
+ "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+ return -1;
+
+ ret = memory_node__read(&nodes[cnt++], idx);
+ }
+
+ *cntp = cnt;
+ closedir(dir);
+
+ if (!ret)
+ qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+ return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ * 0 - version | for future changes
+ * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id | node index
+ * 40 - size | size of bitmap
+ * 48 - bitmap | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ static struct memory_node nodes[MAX_MEMORY_NODES];
+ u64 bsize, version = 1, i, nr;
+ int ret;
+
+ ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+ (unsigned long long *) &bsize);
+ if (ret)
+ return ret;
+
+ ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &version, sizeof(version));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &bsize, sizeof(bsize));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &nr, sizeof(nr));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node *n = &nodes[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &n->v, sizeof(n->v)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(node)
+ _W(size)
+
+ #undef _W
+
+ ret = do_write_bitmap(ff, n->set, n->size);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# sample duration : %10.3f ms\n", d);
}
+static void memory_node__fprintf(struct memory_node *n,
+ unsigned long long bsize, FILE *fp)
+{
+ char buf_map[100], buf_size[50];
+ unsigned long long size;
+
+ size = bsize * bitmap_weight(n->set, n->size);
+ unit_number__scnprintf(buf_size, 50, size);
+
+ bitmap_scnprintf(n->set, n->size, buf_map, 100);
+ fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct memory_node *nodes;
+ int i, nr;
+
+ nodes = ff->ph->env.memory_nodes;
+ nr = ff->ph->env.nr_memory_nodes;
+
+ fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+ nr, ff->ph->env.memory_bsize);
+
+ for (i = 0; i < nr; i++) {
+ memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+ }
+}
+
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
@@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
return 0;
}
+static int process_mem_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct memory_node *nodes;
+ u64 version, i, nr, bsize;
+ int ret = -1;
+
+ if (do_read_u64(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u64(ff, &bsize))
+ return -1;
+
+ if (do_read_u64(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -1;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node n;
+
+ #define _R(v) \
+ if (do_read_u64(ff, &n.v)) \
+ goto out; \
+
+ _R(node)
+ _R(size)
+
+ #undef _R
+
+ if (do_read_bitmap(ff, &n.set, &n.size))
+ goto out;
+
+ nodes[i] = n;
+ }
+
+ ff->ph->env.memory_bsize = bsize;
+ ff->ph->env.memory_nodes = nodes;
+ ff->ph->env.nr_memory_nodes = nr;
+ ret = 0;
+
+out:
+ if (ret)
+ free(nodes);
+ return ret;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(STAT, stat, false),
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
+ FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 942bdec6d70d..90d4577a92dc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -36,6 +36,7 @@ enum {
HEADER_STAT,
HEADER_CACHE,
HEADER_SAMPLE_TIME,
+ HEADER_MEM_TOPOLOGY,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
--
2.14.3
next prev parent reply other threads:[~2018-03-08 19:50 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-08 19:49 [GIT PULL 00/54] perf/core improvements and fixes Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 01/54] perf cgroup: Remove misplaced __maybe_unused Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 02/54] perf cgroup: Rename 'struct cgroup_sel' to 'struct cgroup' Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 03/54] perf cgroup: Introduce cgroup__delete() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 04/54] perf cgroup: Rename close_cgroup() to cgroup__put() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 05/54] perf cgroup: Introduce cgroup__get() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 06/54] perf cgroup: Introduce find_cgroup() method Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 07/54] perf cgroup: Introduce cgroup__new() out of open coded equivalent Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 08/54] perf sched: Move thread::shortname to thread_runtime Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 09/54] perf sched map: Re-annotate shortname if thread comm changed Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 10/54] perf record: Combine some auxtrace initialization into a single function Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 11/54] perf cgroup: Add evlist__findnew_cgroup() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 12/54] perf cgroup: Add evlist__add_default_cgroup() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 13/54] perf cgroup: Make the cgroup name be const char * Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 14/54] perf trace: Support setting cgroups as targets Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 15/54] perf auxtrace: Add missing parameters from kernel-doc comments Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 16/54] perf auxtrace: Rename some buffer-queuing functions Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 17/54] perf auxtrace: Make auxtrace_queues__add_buffer() return buffer_ptr Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 18/54] perf tools: Correct title markers for asciidoctor Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 19/54] perf pmu: Support wildcards on pmu name in dynamic pmu events Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 20/54] perf pmu: Display pmu name when printing unmerged events in stat Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 21/54] perf pmu: Auto-merge PMU events created by prefix or glob match Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 22/54] perf evlist: Store 'overwrite' in struct perf_mmap Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 23/54] perf mmap: Store mmap scope in struct perf_mmap() Arnaldo Carvalho de Melo
2018-03-08 19:49 ` [PATCH 24/54] perf mmap: Use the stored scope data in perf_mmap__push() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 25/54] perf mmap: Use the stored data in perf_mmap__read_event() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 26/54] perf mmap: Use stored 'overwrite' in perf_mmap__consume() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 27/54] perf mmap: Simplify perf_mmap__consume() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 28/54] perf mmap: Simplify perf_mmap__read_event() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 29/54] perf mmap: Simplify perf_mmap__read_init() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 30/54] perf intel-pt: Fix overlap detection to identify consecutive buffers correctly Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 31/54] perf intel-pt: Fix sync_switch Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 32/54] perf intel-pt: Fix error recovery from missing TIP packet Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 33/54] perf intel-pt: Fix timestamp following overflow Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 34/54] perf intel-pt/bts: In auxtrace_record__init_intel() evlist is never NULL Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 35/54] perf intel-pt: Get rid of intel_pt_use_buffer_pid_tid() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 36/54] perf intel-pt: Tidy old_buffer handling in intel_pt_get_trace() Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 37/54] perf intel-pt: Remove a check for sampling mode Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 38/54] perf intel-pt: Adjust overlap-checking to support " Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 39/54] perf annotate: Fix s390 target function disassembly Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 40/54] perf report: Fix the output for stdio events list Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 41/54] perf report: Display perf.data header info Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 42/54] perf record: Move machine variable down the function Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 43/54] perf record: Remove progname from struct record Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 44/54] perf tools: Add refcnt into struct mem_info Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 45/54] perf c2c: Use mem_info refcnt logic Arnaldo Carvalho de Melo
2018-03-08 19:50 ` Arnaldo Carvalho de Melo [this message]
2018-03-08 19:50 ` [PATCH 47/54] perf tools: Update tags with .cpp files Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 48/54] perf build: Add llvm/clang/cxx make tests into FEATURE_TESTS_EXTRA Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 49/54] perf build: Add llvm/clang make targets to FILES Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 50/54] perf build: Force llvm/clang test compile output to .make.output Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 51/54] perf report: Provide libtraceevent with a kernel symbol resolver Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 52/54] perf annotate: Support to display the IPC/Cycle in TUI mode Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 53/54] perf annotate: Handle s390 PC relative load and store instruction Arnaldo Carvalho de Melo
2018-03-08 19:50 ` [PATCH 54/54] perf tools: Update quipper information Arnaldo Carvalho de Melo
2018-03-09 7:29 ` [GIT PULL 00/54] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180308195029.14991-47-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dsahern@gmail.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).