* [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives
@ 2019-08-13 21:06 Kyle Meyer
2019-08-13 21:32 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 2+ messages in thread
From: Kyle Meyer @ 2019-08-13 21:06 UTC (permalink / raw)
Cc: Kyle Meyer, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, linux-kernel,
Russ Anderson, Kyle Meyer
Both cpu__max_cpu and env.nr_cpus_online are dynamic alternatives for
MAX_NR_CPUS, a macro currently used throughout perf. The function cpu__max_cpu
returns the possible number of CPUS as defined in the sysfs, whereas
env.nr_cpus_online is the number of CPUs that were online during the recording
session. MAX_NR_CPUS is still used by DECLARE_BITMAP at compile time, however,
it's replaced elsewhere.
This patch was tested using "perf record -a -g" on both an eight socket 288 CPU
system and a single socket 36 CPU system. Each system was then rebooted single
socket and eight socket before "perf report" was used to read the perf.data out
file. "perf report --header" was used to confirm that each perf.data file had
information on the correct number of CPUs.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linux-kernel@vger.kernel.org
Cc: Russ Anderson <russ.anderson@hpe.com>
Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
---
| 6 +++---
tools/perf/util/machine.c | 11 ++++++-----
tools/perf/util/session.c | 6 +++---
tools/perf/util/stat.c | 4 ++--
tools/perf/util/svghelper.c | 31 +++++++++++++++----------------
5 files changed, 29 insertions(+), 29 deletions(-)
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b04c2b6b28b3..cf8ce839cb47 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1121,16 +1121,16 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
return 0;
}
-#define MAX_CACHES (MAX_NR_CPUS * 4)
+#define MAX_CACHE_LVL 4
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- struct cpu_cache_level caches[MAX_CACHES];
+ struct cpu_cache_level caches[(cpu__max_cpu() * MAX_CACHE_LVL)];
u32 cnt = 0, i, version = 1;
int ret;
- ret = build_caches(caches, MAX_CACHES, &cnt);
+ ret = build_caches(caches, (cpu__max_cpu() * MAX_CACHE_LVL), &cnt);
if (ret)
goto out;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5734460fc89e..cf5f4b4eeea0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2616,7 +2616,8 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
- if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
+ int nr_cpus_online = machine->env->nr_cpus_online;
+ if (cpu < 0 || cpu >= nr_cpus_online || !machine->current_tid)
return -1;
return machine->current_tid[cpu];
@@ -2626,6 +2627,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid)
{
struct thread *thread;
+ int nr_cpus_online = machine->env->nr_cpus_online;
if (cpu < 0)
return -EINVAL;
@@ -2633,16 +2635,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
if (!machine->current_tid) {
int i;
- machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
+ machine->current_tid = calloc(nr_cpus_online, sizeof(pid_t));
if (!machine->current_tid)
return -ENOMEM;
- for (i = 0; i < MAX_NR_CPUS; i++)
+ for (i = 0; i < nr_cpus_online; i++)
machine->current_tid[i] = -1;
}
- if (cpu >= MAX_NR_CPUS) {
+ if (cpu >= nr_cpus_online) {
pr_err("Requested CPU %d too large. ", cpu);
- pr_err("Consider raising MAX_NR_CPUS\n");
return -EINVAL;
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 11e6093c941b..a9d244a94e24 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2275,6 +2275,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
{
int i, err = -1;
struct perf_cpu_map *map;
+ int nr_cpus_online = session->header.env.nr_cpus_online;
for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2299,9 +2300,8 @@ int perf_session__cpu_bitmap(struct perf_session *session,
for (i = 0; i < map->nr; i++) {
int cpu = map->map[i];
- if (cpu >= MAX_NR_CPUS) {
- pr_err("Requested CPU %d too large. "
- "Consider raising MAX_NR_CPUS\n", cpu);
+ if (cpu >= nr_cpus_online) {
+ pr_err("Requested CPU %d too large\n", cpu);
goto out_delete_map;
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index e4e4e3bf8b2b..199008ce936d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -208,7 +208,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
static void zero_per_pkg(struct evsel *counter)
{
if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+ memset(counter->per_pkg_mask, 0, cpu__max_cpu());
}
static int check_per_pkg(struct evsel *counter,
@@ -227,7 +227,7 @@ static int check_per_pkg(struct evsel *counter,
return 0;
if (!mask) {
- mask = zalloc(MAX_NR_CPUS);
+ mask = zalloc(cpu__max_cpu());
if (!mask)
return -ENOMEM;
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1beeb7291361..6e8433e97f21 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -695,7 +695,8 @@ struct topology {
int sib_thr_nr;
};
-static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
+static void scan_thread_topology(int *map, struct topology *t, int cpu,
+int *pos, int max_cpus)
{
int i;
int thr;
@@ -704,28 +705,24 @@ static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos
if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
continue;
- for_each_set_bit(thr,
- cpumask_bits(&t->sib_thr[i]),
- MAX_NR_CPUS)
+ for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), max_cpus)
if (map[thr] == -1)
map[thr] = (*pos)++;
}
}
-static void scan_core_topology(int *map, struct topology *t)
+static void scan_core_topology(int *map, struct topology *t, int max_cpus)
{
int pos = 0;
int i;
int cpu;
for (i = 0; i < t->sib_core_nr; i++)
- for_each_set_bit(cpu,
- cpumask_bits(&t->sib_core[i]),
- MAX_NR_CPUS)
- scan_thread_topology(map, t, cpu, &pos);
+ for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), max_cpus)
+ scan_thread_topology(map, t, cpu, &pos, max_cpus);
}
-static int str_to_bitmap(char *s, cpumask_t *b)
+static int str_to_bitmap(char *s, cpumask_t *b, int max_cpus)
{
int i;
int ret = 0;
@@ -738,7 +735,7 @@ static int str_to_bitmap(char *s, cpumask_t *b)
for (i = 0; i < m->nr; i++) {
c = m->map[i];
- if (c >= MAX_NR_CPUS) {
+ if (c >= max_cpus) {
ret = -1;
break;
}
@@ -767,7 +764,8 @@ int svg_build_topology_map(struct perf_env *env)
}
for (i = 0; i < env->nr_sibling_cores; i++) {
- if (str_to_bitmap(env->sibling_cores, &t.sib_core[i])) {
+ if (str_to_bitmap(env->sibling_cores, &t.sib_core[i],
+ env->nr_cpus_online)) {
fprintf(stderr, "topology: can't parse siblings map\n");
goto exit;
}
@@ -776,7 +774,8 @@ int svg_build_topology_map(struct perf_env *env)
}
for (i = 0; i < env->nr_sibling_threads; i++) {
- if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i])) {
+ if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i],
+ env->nr_cpus_online)) {
fprintf(stderr, "topology: can't parse siblings map\n");
goto exit;
}
@@ -784,16 +783,16 @@ int svg_build_topology_map(struct perf_env *env)
env->sibling_threads += strlen(env->sibling_threads) + 1;
}
- topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
+ topology_map = malloc(sizeof(int) * env->nr_cpus_online);
if (!topology_map) {
fprintf(stderr, "topology: no memory\n");
goto exit;
}
- for (i = 0; i < MAX_NR_CPUS; i++)
+ for (i = 0; i < env->nr_cpus_online; i++)
topology_map[i] = -1;
- scan_core_topology(topology_map, &t);
+ scan_core_topology(topology_map, &t, env->nr_cpus_online);
return 0;
--
2.12.3
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives
2019-08-13 21:06 [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives Kyle Meyer
@ 2019-08-13 21:32 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-08-13 21:32 UTC (permalink / raw)
To: Kyle Meyer
Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, Jiri Olsa,
Namhyung Kim, linux-kernel, Russ Anderson, Kyle Meyer
Em Tue, Aug 13, 2019 at 04:06:13PM -0500, Kyle Meyer escreveu:
> Both cpu__max_cpu and env.nr_cpus_online are dynamic alternatives for
> MAX_NR_CPUS, a macro currently used throughout perf. The function cpu__max_cpu
> returns the possible number of CPUS as defined in the sysfs, whereas
> env.nr_cpus_online is the number of CPUs that were online during the recording
> session. MAX_NR_CPUS is still used by DECLARE_BITMAP at compile time, however,
> it's replaced elsewhere.
>
> This patch was tested using "perf record -a -g" on both an eight socket 288 CPU
> system and a single socket 36 CPU system. Each system was then rebooted single
> socket and eight socket before "perf report" was used to read the perf.data out
> file. "perf report --header" was used to confirm that each perf.data file had
> information on the correct number of CPUs.
Can you please further break this into multiple patches, one for the
svg code, another for the header and so on.
Thanks,
- Arnaldo
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: linux-kernel@vger.kernel.org
> Cc: Russ Anderson <russ.anderson@hpe.com>
> Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
> ---
> tools/perf/util/header.c | 6 +++---
> tools/perf/util/machine.c | 11 ++++++-----
> tools/perf/util/session.c | 6 +++---
> tools/perf/util/stat.c | 4 ++--
> tools/perf/util/svghelper.c | 31 +++++++++++++++----------------
> 5 files changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index b04c2b6b28b3..cf8ce839cb47 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1121,16 +1121,16 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
> return 0;
> }
>
> -#define MAX_CACHES (MAX_NR_CPUS * 4)
> +#define MAX_CACHE_LVL 4
>
> static int write_cache(struct feat_fd *ff,
> struct evlist *evlist __maybe_unused)
> {
> - struct cpu_cache_level caches[MAX_CACHES];
> + struct cpu_cache_level caches[(cpu__max_cpu() * MAX_CACHE_LVL)];
> u32 cnt = 0, i, version = 1;
> int ret;
>
> - ret = build_caches(caches, MAX_CACHES, &cnt);
> + ret = build_caches(caches, (cpu__max_cpu() * MAX_CACHE_LVL), &cnt);
> if (ret)
> goto out;
>
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 5734460fc89e..cf5f4b4eeea0 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -2616,7 +2616,8 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
>
> pid_t machine__get_current_tid(struct machine *machine, int cpu)
> {
> - if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
> + int nr_cpus_online = machine->env->nr_cpus_online;
> + if (cpu < 0 || cpu >= nr_cpus_online || !machine->current_tid)
> return -1;
>
> return machine->current_tid[cpu];
> @@ -2626,6 +2627,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
> pid_t tid)
> {
> struct thread *thread;
> + int nr_cpus_online = machine->env->nr_cpus_online;
>
> if (cpu < 0)
> return -EINVAL;
> @@ -2633,16 +2635,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
> if (!machine->current_tid) {
> int i;
>
> - machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
> + machine->current_tid = calloc(nr_cpus_online, sizeof(pid_t));
> if (!machine->current_tid)
> return -ENOMEM;
> - for (i = 0; i < MAX_NR_CPUS; i++)
> + for (i = 0; i < nr_cpus_online; i++)
> machine->current_tid[i] = -1;
> }
>
> - if (cpu >= MAX_NR_CPUS) {
> + if (cpu >= nr_cpus_online) {
> pr_err("Requested CPU %d too large. ", cpu);
> - pr_err("Consider raising MAX_NR_CPUS\n");
> return -EINVAL;
> }
>
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 11e6093c941b..a9d244a94e24 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -2275,6 +2275,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
> {
> int i, err = -1;
> struct perf_cpu_map *map;
> + int nr_cpus_online = session->header.env.nr_cpus_online;
>
> for (i = 0; i < PERF_TYPE_MAX; ++i) {
> struct evsel *evsel;
> @@ -2299,9 +2300,8 @@ int perf_session__cpu_bitmap(struct perf_session *session,
> for (i = 0; i < map->nr; i++) {
> int cpu = map->map[i];
>
> - if (cpu >= MAX_NR_CPUS) {
> - pr_err("Requested CPU %d too large. "
> - "Consider raising MAX_NR_CPUS\n", cpu);
> + if (cpu >= nr_cpus_online) {
> + pr_err("Requested CPU %d too large\n", cpu);
> goto out_delete_map;
> }
>
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index e4e4e3bf8b2b..199008ce936d 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -208,7 +208,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
> static void zero_per_pkg(struct evsel *counter)
> {
> if (counter->per_pkg_mask)
> - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
> + memset(counter->per_pkg_mask, 0, cpu__max_cpu());
> }
>
> static int check_per_pkg(struct evsel *counter,
> @@ -227,7 +227,7 @@ static int check_per_pkg(struct evsel *counter,
> return 0;
>
> if (!mask) {
> - mask = zalloc(MAX_NR_CPUS);
> + mask = zalloc(cpu__max_cpu());
> if (!mask)
> return -ENOMEM;
>
> diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
> index 1beeb7291361..6e8433e97f21 100644
> --- a/tools/perf/util/svghelper.c
> +++ b/tools/perf/util/svghelper.c
> @@ -695,7 +695,8 @@ struct topology {
> int sib_thr_nr;
> };
>
> -static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
> +static void scan_thread_topology(int *map, struct topology *t, int cpu,
> +int *pos, int max_cpus)
> {
> int i;
> int thr;
> @@ -704,28 +705,24 @@ static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos
> if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
> continue;
>
> - for_each_set_bit(thr,
> - cpumask_bits(&t->sib_thr[i]),
> - MAX_NR_CPUS)
> + for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), max_cpus)
> if (map[thr] == -1)
> map[thr] = (*pos)++;
> }
> }
>
> -static void scan_core_topology(int *map, struct topology *t)
> +static void scan_core_topology(int *map, struct topology *t, int max_cpus)
> {
> int pos = 0;
> int i;
> int cpu;
>
> for (i = 0; i < t->sib_core_nr; i++)
> - for_each_set_bit(cpu,
> - cpumask_bits(&t->sib_core[i]),
> - MAX_NR_CPUS)
> - scan_thread_topology(map, t, cpu, &pos);
> + for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), max_cpus)
> + scan_thread_topology(map, t, cpu, &pos, max_cpus);
> }
>
> -static int str_to_bitmap(char *s, cpumask_t *b)
> +static int str_to_bitmap(char *s, cpumask_t *b, int max_cpus)
> {
> int i;
> int ret = 0;
> @@ -738,7 +735,7 @@ static int str_to_bitmap(char *s, cpumask_t *b)
>
> for (i = 0; i < m->nr; i++) {
> c = m->map[i];
> - if (c >= MAX_NR_CPUS) {
> + if (c >= max_cpus) {
> ret = -1;
> break;
> }
> @@ -767,7 +764,8 @@ int svg_build_topology_map(struct perf_env *env)
> }
>
> for (i = 0; i < env->nr_sibling_cores; i++) {
> - if (str_to_bitmap(env->sibling_cores, &t.sib_core[i])) {
> + if (str_to_bitmap(env->sibling_cores, &t.sib_core[i],
> + env->nr_cpus_online)) {
> fprintf(stderr, "topology: can't parse siblings map\n");
> goto exit;
> }
> @@ -776,7 +774,8 @@ int svg_build_topology_map(struct perf_env *env)
> }
>
> for (i = 0; i < env->nr_sibling_threads; i++) {
> - if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i])) {
> + if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i],
> + env->nr_cpus_online)) {
> fprintf(stderr, "topology: can't parse siblings map\n");
> goto exit;
> }
> @@ -784,16 +783,16 @@ int svg_build_topology_map(struct perf_env *env)
> env->sibling_threads += strlen(env->sibling_threads) + 1;
> }
>
> - topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
> + topology_map = malloc(sizeof(int) * env->nr_cpus_online);
> if (!topology_map) {
> fprintf(stderr, "topology: no memory\n");
> goto exit;
> }
>
> - for (i = 0; i < MAX_NR_CPUS; i++)
> + for (i = 0; i < env->nr_cpus_online; i++)
> topology_map[i] = -1;
>
> - scan_core_topology(topology_map, &t);
> + scan_core_topology(topology_map, &t, env->nr_cpus_online);
>
> return 0;
>
> --
> 2.12.3
--
- Arnaldo
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-08-13 21:32 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-13 21:06 [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives Kyle Meyer
2019-08-13 21:32 ` Arnaldo Carvalho de Melo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.