* [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives
@ 2019-08-13 21:06 Kyle Meyer
2019-08-13 21:32 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 2+ messages in thread
From: Kyle Meyer @ 2019-08-13 21:06 UTC (permalink / raw)
Cc: Kyle Meyer, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, linux-kernel,
Russ Anderson, Kyle Meyer
Both cpu__max_cpu and env.nr_cpus_online are dynamic alternatives for
MAX_NR_CPUS, a macro currently used throughout perf. The function cpu__max_cpu
returns the possible number of CPUS as defined in the sysfs, whereas
env.nr_cpus_online is the number of CPUs that were online during the recording
session. MAX_NR_CPUS is still used by DECLARE_BITMAP at compile time, however,
it's replaced elsewhere.
This patch was tested using "perf record -a -g" on both an eight socket 288 CPU
system and a single socket 36 CPU system. Each system was then rebooted single
socket and eight socket before "perf report" was used to read the perf.data out
file. "perf report --header" was used to confirm that each perf.data file had
information on the correct number of CPUs.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linux-kernel@vger.kernel.org
Cc: Russ Anderson <russ.anderson@hpe.com>
Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
---
| 6 +++---
tools/perf/util/machine.c | 11 ++++++-----
tools/perf/util/session.c | 6 +++---
tools/perf/util/stat.c | 4 ++--
tools/perf/util/svghelper.c | 31 +++++++++++++++----------------
5 files changed, 29 insertions(+), 29 deletions(-)
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b04c2b6b28b3..cf8ce839cb47 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1121,16 +1121,16 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
return 0;
}
-#define MAX_CACHES (MAX_NR_CPUS * 4)
+#define MAX_CACHE_LVL 4
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- struct cpu_cache_level caches[MAX_CACHES];
+ struct cpu_cache_level caches[(cpu__max_cpu() * MAX_CACHE_LVL)];
u32 cnt = 0, i, version = 1;
int ret;
- ret = build_caches(caches, MAX_CACHES, &cnt);
+ ret = build_caches(caches, (cpu__max_cpu() * MAX_CACHE_LVL), &cnt);
if (ret)
goto out;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5734460fc89e..cf5f4b4eeea0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2616,7 +2616,8 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
- if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
+ int nr_cpus_online = machine->env->nr_cpus_online;
+ if (cpu < 0 || cpu >= nr_cpus_online || !machine->current_tid)
return -1;
return machine->current_tid[cpu];
@@ -2626,6 +2627,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid)
{
struct thread *thread;
+ int nr_cpus_online = machine->env->nr_cpus_online;
if (cpu < 0)
return -EINVAL;
@@ -2633,16 +2635,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
if (!machine->current_tid) {
int i;
- machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
+ machine->current_tid = calloc(nr_cpus_online, sizeof(pid_t));
if (!machine->current_tid)
return -ENOMEM;
- for (i = 0; i < MAX_NR_CPUS; i++)
+ for (i = 0; i < nr_cpus_online; i++)
machine->current_tid[i] = -1;
}
- if (cpu >= MAX_NR_CPUS) {
+ if (cpu >= nr_cpus_online) {
pr_err("Requested CPU %d too large. ", cpu);
- pr_err("Consider raising MAX_NR_CPUS\n");
return -EINVAL;
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 11e6093c941b..a9d244a94e24 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2275,6 +2275,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
{
int i, err = -1;
struct perf_cpu_map *map;
+ int nr_cpus_online = session->header.env.nr_cpus_online;
for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2299,9 +2300,8 @@ int perf_session__cpu_bitmap(struct perf_session *session,
for (i = 0; i < map->nr; i++) {
int cpu = map->map[i];
- if (cpu >= MAX_NR_CPUS) {
- pr_err("Requested CPU %d too large. "
- "Consider raising MAX_NR_CPUS\n", cpu);
+ if (cpu >= nr_cpus_online) {
+ pr_err("Requested CPU %d too large\n", cpu);
goto out_delete_map;
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index e4e4e3bf8b2b..199008ce936d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -208,7 +208,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
static void zero_per_pkg(struct evsel *counter)
{
if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+ memset(counter->per_pkg_mask, 0, cpu__max_cpu());
}
static int check_per_pkg(struct evsel *counter,
@@ -227,7 +227,7 @@ static int check_per_pkg(struct evsel *counter,
return 0;
if (!mask) {
- mask = zalloc(MAX_NR_CPUS);
+ mask = zalloc(cpu__max_cpu());
if (!mask)
return -ENOMEM;
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1beeb7291361..6e8433e97f21 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -695,7 +695,8 @@ struct topology {
int sib_thr_nr;
};
-static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
+static void scan_thread_topology(int *map, struct topology *t, int cpu,
+int *pos, int max_cpus)
{
int i;
int thr;
@@ -704,28 +705,24 @@ static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos
if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
continue;
- for_each_set_bit(thr,
- cpumask_bits(&t->sib_thr[i]),
- MAX_NR_CPUS)
+ for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), max_cpus)
if (map[thr] == -1)
map[thr] = (*pos)++;
}
}
-static void scan_core_topology(int *map, struct topology *t)
+static void scan_core_topology(int *map, struct topology *t, int max_cpus)
{
int pos = 0;
int i;
int cpu;
for (i = 0; i < t->sib_core_nr; i++)
- for_each_set_bit(cpu,
- cpumask_bits(&t->sib_core[i]),
- MAX_NR_CPUS)
- scan_thread_topology(map, t, cpu, &pos);
+ for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), max_cpus)
+ scan_thread_topology(map, t, cpu, &pos, max_cpus);
}
-static int str_to_bitmap(char *s, cpumask_t *b)
+static int str_to_bitmap(char *s, cpumask_t *b, int max_cpus)
{
int i;
int ret = 0;
@@ -738,7 +735,7 @@ static int str_to_bitmap(char *s, cpumask_t *b)
for (i = 0; i < m->nr; i++) {
c = m->map[i];
- if (c >= MAX_NR_CPUS) {
+ if (c >= max_cpus) {
ret = -1;
break;
}
@@ -767,7 +764,8 @@ int svg_build_topology_map(struct perf_env *env)
}
for (i = 0; i < env->nr_sibling_cores; i++) {
- if (str_to_bitmap(env->sibling_cores, &t.sib_core[i])) {
+ if (str_to_bitmap(env->sibling_cores, &t.sib_core[i],
+ env->nr_cpus_online)) {
fprintf(stderr, "topology: can't parse siblings map\n");
goto exit;
}
@@ -776,7 +774,8 @@ int svg_build_topology_map(struct perf_env *env)
}
for (i = 0; i < env->nr_sibling_threads; i++) {
- if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i])) {
+ if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i],
+ env->nr_cpus_online)) {
fprintf(stderr, "topology: can't parse siblings map\n");
goto exit;
}
@@ -784,16 +783,16 @@ int svg_build_topology_map(struct perf_env *env)
env->sibling_threads += strlen(env->sibling_threads) + 1;
}
- topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
+ topology_map = malloc(sizeof(int) * env->nr_cpus_online);
if (!topology_map) {
fprintf(stderr, "topology: no memory\n");
goto exit;
}
- for (i = 0; i < MAX_NR_CPUS; i++)
+ for (i = 0; i < env->nr_cpus_online; i++)
topology_map[i] = -1;
- scan_core_topology(topology_map, &t);
+ scan_core_topology(topology_map, &t, env->nr_cpus_online);
return 0;
--
2.12.3
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives
2019-08-13 21:06 [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives Kyle Meyer
@ 2019-08-13 21:32 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-08-13 21:32 UTC (permalink / raw)
To: Kyle Meyer
Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, Jiri Olsa,
Namhyung Kim, linux-kernel, Russ Anderson, Kyle Meyer
Em Tue, Aug 13, 2019 at 04:06:13PM -0500, Kyle Meyer escreveu:
> Both cpu__max_cpu and env.nr_cpus_online are dynamic alternatives for
> MAX_NR_CPUS, a macro currently used throughout perf. The function cpu__max_cpu
> returns the possible number of CPUS as defined in the sysfs, whereas
> env.nr_cpus_online is the number of CPUs that were online during the recording
> session. MAX_NR_CPUS is still used by DECLARE_BITMAP at compile time, however,
> it's replaced elsewhere.
>
> This patch was tested using "perf record -a -g" on both an eight socket 288 CPU
> system and a single socket 36 CPU system. Each system was then rebooted single
> socket and eight socket before "perf report" was used to read the perf.data out
> file. "perf report --header" was used to confirm that each perf.data file had
> information on the correct number of CPUs.
Can you please further break this into multiple patches, one for the
svg code, another for the header and so on.
Thanks,
- Arnaldo
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: linux-kernel@vger.kernel.org
> Cc: Russ Anderson <russ.anderson@hpe.com>
> Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
> ---
> tools/perf/util/header.c | 6 +++---
> tools/perf/util/machine.c | 11 ++++++-----
> tools/perf/util/session.c | 6 +++---
> tools/perf/util/stat.c | 4 ++--
> tools/perf/util/svghelper.c | 31 +++++++++++++++----------------
> 5 files changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index b04c2b6b28b3..cf8ce839cb47 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1121,16 +1121,16 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
> return 0;
> }
>
> -#define MAX_CACHES (MAX_NR_CPUS * 4)
> +#define MAX_CACHE_LVL 4
>
> static int write_cache(struct feat_fd *ff,
> struct evlist *evlist __maybe_unused)
> {
> - struct cpu_cache_level caches[MAX_CACHES];
> + struct cpu_cache_level caches[(cpu__max_cpu() * MAX_CACHE_LVL)];
> u32 cnt = 0, i, version = 1;
> int ret;
>
> - ret = build_caches(caches, MAX_CACHES, &cnt);
> + ret = build_caches(caches, (cpu__max_cpu() * MAX_CACHE_LVL), &cnt);
> if (ret)
> goto out;
>
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 5734460fc89e..cf5f4b4eeea0 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -2616,7 +2616,8 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
>
> pid_t machine__get_current_tid(struct machine *machine, int cpu)
> {
> - if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
> + int nr_cpus_online = machine->env->nr_cpus_online;
> + if (cpu < 0 || cpu >= nr_cpus_online || !machine->current_tid)
> return -1;
>
> return machine->current_tid[cpu];
> @@ -2626,6 +2627,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
> pid_t tid)
> {
> struct thread *thread;
> + int nr_cpus_online = machine->env->nr_cpus_online;
>
> if (cpu < 0)
> return -EINVAL;
> @@ -2633,16 +2635,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
> if (!machine->current_tid) {
> int i;
>
> - machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
> + machine->current_tid = calloc(nr_cpus_online, sizeof(pid_t));
> if (!machine->current_tid)
> return -ENOMEM;
> - for (i = 0; i < MAX_NR_CPUS; i++)
> + for (i = 0; i < nr_cpus_online; i++)
> machine->current_tid[i] = -1;
> }
>
> - if (cpu >= MAX_NR_CPUS) {
> + if (cpu >= nr_cpus_online) {
> pr_err("Requested CPU %d too large. ", cpu);
> - pr_err("Consider raising MAX_NR_CPUS\n");
> return -EINVAL;
> }
>
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 11e6093c941b..a9d244a94e24 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -2275,6 +2275,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
> {
> int i, err = -1;
> struct perf_cpu_map *map;
> + int nr_cpus_online = session->header.env.nr_cpus_online;
>
> for (i = 0; i < PERF_TYPE_MAX; ++i) {
> struct evsel *evsel;
> @@ -2299,9 +2300,8 @@ int perf_session__cpu_bitmap(struct perf_session *session,
> for (i = 0; i < map->nr; i++) {
> int cpu = map->map[i];
>
> - if (cpu >= MAX_NR_CPUS) {
> - pr_err("Requested CPU %d too large. "
> - "Consider raising MAX_NR_CPUS\n", cpu);
> + if (cpu >= nr_cpus_online) {
> + pr_err("Requested CPU %d too large\n", cpu);
> goto out_delete_map;
> }
>
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index e4e4e3bf8b2b..199008ce936d 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -208,7 +208,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
> static void zero_per_pkg(struct evsel *counter)
> {
> if (counter->per_pkg_mask)
> - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
> + memset(counter->per_pkg_mask, 0, cpu__max_cpu());
> }
>
> static int check_per_pkg(struct evsel *counter,
> @@ -227,7 +227,7 @@ static int check_per_pkg(struct evsel *counter,
> return 0;
>
> if (!mask) {
> - mask = zalloc(MAX_NR_CPUS);
> + mask = zalloc(cpu__max_cpu());
> if (!mask)
> return -ENOMEM;
>
> diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
> index 1beeb7291361..6e8433e97f21 100644
> --- a/tools/perf/util/svghelper.c
> +++ b/tools/perf/util/svghelper.c
> @@ -695,7 +695,8 @@ struct topology {
> int sib_thr_nr;
> };
>
> -static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
> +static void scan_thread_topology(int *map, struct topology *t, int cpu,
> +int *pos, int max_cpus)
> {
> int i;
> int thr;
> @@ -704,28 +705,24 @@ static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos
> if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
> continue;
>
> - for_each_set_bit(thr,
> - cpumask_bits(&t->sib_thr[i]),
> - MAX_NR_CPUS)
> + for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), max_cpus)
> if (map[thr] == -1)
> map[thr] = (*pos)++;
> }
> }
>
> -static void scan_core_topology(int *map, struct topology *t)
> +static void scan_core_topology(int *map, struct topology *t, int max_cpus)
> {
> int pos = 0;
> int i;
> int cpu;
>
> for (i = 0; i < t->sib_core_nr; i++)
> - for_each_set_bit(cpu,
> - cpumask_bits(&t->sib_core[i]),
> - MAX_NR_CPUS)
> - scan_thread_topology(map, t, cpu, &pos);
> + for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), max_cpus)
> + scan_thread_topology(map, t, cpu, &pos, max_cpus);
> }
>
> -static int str_to_bitmap(char *s, cpumask_t *b)
> +static int str_to_bitmap(char *s, cpumask_t *b, int max_cpus)
> {
> int i;
> int ret = 0;
> @@ -738,7 +735,7 @@ static int str_to_bitmap(char *s, cpumask_t *b)
>
> for (i = 0; i < m->nr; i++) {
> c = m->map[i];
> - if (c >= MAX_NR_CPUS) {
> + if (c >= max_cpus) {
> ret = -1;
> break;
> }
> @@ -767,7 +764,8 @@ int svg_build_topology_map(struct perf_env *env)
> }
>
> for (i = 0; i < env->nr_sibling_cores; i++) {
> - if (str_to_bitmap(env->sibling_cores, &t.sib_core[i])) {
> + if (str_to_bitmap(env->sibling_cores, &t.sib_core[i],
> + env->nr_cpus_online)) {
> fprintf(stderr, "topology: can't parse siblings map\n");
> goto exit;
> }
> @@ -776,7 +774,8 @@ int svg_build_topology_map(struct perf_env *env)
> }
>
> for (i = 0; i < env->nr_sibling_threads; i++) {
> - if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i])) {
> + if (str_to_bitmap(env->sibling_threads, &t.sib_thr[i],
> + env->nr_cpus_online)) {
> fprintf(stderr, "topology: can't parse siblings map\n");
> goto exit;
> }
> @@ -784,16 +783,16 @@ int svg_build_topology_map(struct perf_env *env)
> env->sibling_threads += strlen(env->sibling_threads) + 1;
> }
>
> - topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
> + topology_map = malloc(sizeof(int) * env->nr_cpus_online);
> if (!topology_map) {
> fprintf(stderr, "topology: no memory\n");
> goto exit;
> }
>
> - for (i = 0; i < MAX_NR_CPUS; i++)
> + for (i = 0; i < env->nr_cpus_online; i++)
> topology_map[i] = -1;
>
> - scan_core_topology(topology_map, &t);
> + scan_core_topology(topology_map, &t, env->nr_cpus_online);
>
> return 0;
>
> --
> 2.12.3
--
- Arnaldo
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-08-13 21:32 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-13 21:06 [PATCH 2/2] perf: Replace MAX_NR_CPUS with dynamic alternatives Kyle Meyer
2019-08-13 21:32 ` Arnaldo Carvalho de Melo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox