From: Namhyung Kim <namhyung@kernel.org>
To: Ian Rogers <irogers@google.com>
Cc: acme@kernel.org, tmricht@linux.ibm.com, agordeev@linux.ibm.com,
gor@linux.ibm.com, hca@linux.ibm.com, jameshongleiwang@126.com,
japo@linux.ibm.com, linux-kernel@vger.kernel.org,
linux-perf-users@vger.kernel.org, linux-s390@vger.kernel.org,
sumanthk@linux.ibm.com
Subject: Re: [PATCH v4 1/2] perf env: Add perf_env__e_machine helper and use in perf_env__arch
Date: Sun, 5 Apr 2026 22:05:02 -0700 [thread overview]
Message-ID: <adM-_viYsw5KEoeV@google.com> (raw)
In-Reply-To: <20260327045025.2276517-2-irogers@google.com>
On Thu, Mar 26, 2026 at 09:50:24PM -0700, Ian Rogers wrote:
> Add a helper that lazily computes the e_machine and falls back of
> EM_HOST. Use the perf_env's arch to compute the e_machine if
> available. Use a binary search for some efficiency in this, but handle
> somewhat complex duplicate rules. Switch perf_env__arch to be derived
> the e_machine for consistency. This switches arch from being uname
> derived to matching that of the perf binary (via EM_HOST). Update
> session to use the helper, which may mean using EM_HOST when no
> threads are available. This also updates the perf data file header
> that gets the e_machine/e_flags from the session.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/env.c | 185 ++++++++++++++++++++++++++++++--------
> tools/perf/util/env.h | 1 +
> tools/perf/util/session.c | 14 +--
> 3 files changed, 157 insertions(+), 43 deletions(-)
>
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 93d475a80f14..ae08178870d7 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -1,10 +1,12 @@
> // SPDX-License-Identifier: GPL-2.0
> #include "cpumap.h"
> +#include "dwarf-regs.h"
> #include "debug.h"
> #include "env.h"
> #include "util/header.h"
> #include "util/rwsem.h"
> #include <linux/compiler.h>
> +#include <linux/kernel.h>
> #include <linux/ctype.h>
> #include <linux/rbtree.h>
> #include <linux/string.h>
> @@ -588,51 +590,160 @@ void cpu_cache_level__free(struct cpu_cache_level *cache)
> zfree(&cache->size);
> }
>
> +struct arch_to_e_machine {
> + const char *prefix;
> + uint16_t e_machine;
> +};
> +
> /*
> - * Return architecture name in a normalized form.
> - * The conversion logic comes from the Makefile.
> + * A mapping from an arch prefix string to an ELF machine that can be used in a
> + * bsearch. Some arch prefixes are shared an need additional processing as
> + * marked next to the architecture. The prefixes handle both perf's architecture
> + * naming and those from uname.
> */
> -static const char *normalize_arch(char *arch)
> -{
> - if (!strcmp(arch, "x86_64"))
> - return "x86";
> - if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
> - return "x86";
> - if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
> - return "sparc";
> - if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
> - return "arm64";
> - if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
> - return "arm";
> - if (!strncmp(arch, "s390", 4))
> - return "s390";
> - if (!strncmp(arch, "parisc", 6))
> - return "parisc";
> - if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
> - return "powerpc";
> - if (!strncmp(arch, "mips", 4))
> - return "mips";
> - if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
> - return "sh";
> - if (!strncmp(arch, "loongarch", 9))
> - return "loongarch";
> -
> - return arch;
> +static const struct arch_to_e_machine prefix_to_e_machine[] = {
> + {"aarch64", EM_AARCH64},
> + {"alpha", EM_ALPHA},
> + {"arc", EM_ARC},
> + {"arm", EM_ARM}, /* Check also for EM_AARCH64. */
> + {"avr", EM_AVR}, /* Check also for EM_AVR32. */
> + {"bfin", EM_BLACKFIN},
> + {"blackfin", EM_BLACKFIN},
> + {"cris", EM_CRIS},
> + {"csky", EM_CSKY},
> + {"hppa", EM_PARISC},
> + {"i386", EM_386},
> + {"i486", EM_386},
> + {"i586", EM_386},
> + {"i686", EM_386},
> + {"loongarch", EM_LOONGARCH},
> + {"m32r", EM_M32R},
> + {"m68k", EM_68K},
> + {"microblaze", EM_MICROBLAZE},
> + {"mips", EM_MIPS},
> + {"msp430", EM_MSP430},
> + {"parisc", EM_PARISC},
> + {"powerpc", EM_PPC}, /* Check also for EM_PPC64. */
> + {"ppc", EM_PPC}, /* Check also for EM_PPC64. */
> + {"riscv", EM_RISCV},
> + {"s390", EM_S390},
> + {"sa110", EM_ARM},
> + {"sh", EM_SH},
> + {"sparc", EM_SPARC}, /* Check also for EM_SPARCV9. */
> + {"sun4u", EM_SPARC},
> + {"x86", EM_X86_64}, /* Check also for EM_386. */
> + {"xtensa", EM_XTENSA},
> +};
> +
> +static int compare_prefix(const void *key, const void *element)
> +{
> + const char *search_key = key;
> + const struct arch_to_e_machine *map_element = element;
> + size_t prefix_len = strlen(map_element->prefix);
> +
> + return strncmp(search_key, map_element->prefix, prefix_len);
> +}
> +
> +static uint16_t perf_arch_to_e_machine(const char *perf_arch, bool is_64_bit)
> +{
> + /* Binary search for a matching prefix. */
> + const struct arch_to_e_machine *result;
> +
> + if (!perf_arch)
> + return EM_HOST;
> +
> + result = bsearch(perf_arch,
> + prefix_to_e_machine, ARRAY_SIZE(prefix_to_e_machine),
> + sizeof(prefix_to_e_machine[0]),
> + compare_prefix);
> +
> + if (!result) {
> + pr_debug("Unknown perf arch for ELF machine mapping: %s\n", perf_arch);
> + return EM_NONE;
> + }
> +
> + /* Handle conflicting prefixes. */
> + switch (result->e_machine) {
> + case EM_ARM:
> + return !strcmp(perf_arch, "arm64") ? EM_AARCH64 : EM_ARM;
> + case EM_AVR:
> + return !strcmp(perf_arch, "avr32") ? EM_AVR32 : EM_AVR;
> + case EM_PPC:
> + return is_64_bit || strstarts(perf_arch, "ppc64") ? EM_PPC64 : EM_PPC;
I'm curious what's the name `uname -m` returns for PPC64. Is
"powerpc64" possible?
> + case EM_SPARC:
> + return is_64_bit || !strcmp(perf_arch, "sparc64") ? EM_SPARCV9 : EM_SPARC;
> + case EM_X86_64:
> + return is_64_bit || !strcmp(perf_arch, "x86_64") ? EM_X86_64 : EM_386;
> + default:
> + return result->e_machine;
> + }
> +}
> +
> +static const char *e_machine_to_perf_arch(uint16_t e_machine)
> +{
> + /*
> + * Table for if either the perf arch string differs from uname or there
> + * are >1 ELF machine with the prefix.
> + */
> + static const struct arch_to_e_machine extras[] = {
> + {"arm64", EM_AARCH64},
> + {"avr32", EM_AVR32},
> + {"powerpc", EM_PPC},
> + {"powerpc", EM_PPC64},
Here it returns powerpc for both.
> + {"sparc", EM_SPARCV9},
> + {"x86", EM_386},
> + {"x86", EM_X86_64},
> + {"none", EM_NONE},
> + };
> +
> + for (size_t i = 0; i < ARRAY_SIZE(extras); i++) {
> + if (extras[i].e_machine == e_machine)
> + return extras[i].prefix;
> + }
> +
> + for (size_t i = 0; i < ARRAY_SIZE(prefix_to_e_machine); i++) {
> + if (prefix_to_e_machine[i].e_machine == e_machine)
> + return prefix_to_e_machine[i].prefix;
> +
> + }
> + return "unknown";
> +}
> +
> +uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags)
> +{
> + if (!env) {
> + if (e_flags)
> + *e_flags = EF_HOST;
> +
> + return EM_HOST;
> + }
> + if (env->e_machine == EM_NONE) {
> + env->e_machine = perf_arch_to_e_machine(env->arch, env->kernel_is_64_bit);
> +
> + if (env->e_machine == EM_HOST)
> + env->e_flags = EF_HOST;
> + }
> + if (e_flags)
> + *e_flags = env->e_flags;
> +
> + return env->e_machine;
> }
>
> const char *perf_env__arch(struct perf_env *env)
> {
> - char *arch_name;
> + if (!env)
> + return e_machine_to_perf_arch(EM_HOST);
>
> - if (!env || !env->arch) { /* Assume local operation */
> - static struct utsname uts = { .machine[0] = '\0', };
> - if (uts.machine[0] == '\0' && uname(&uts) < 0)
> - return NULL;
> - arch_name = uts.machine;
> - } else
> - arch_name = env->arch;
> + if (!env->arch) {
> + /*
> + * Lazily compute/allocate arch. The e_machine may have been
> + * read from a data file and so may not be EM_HOST.
> + */
> + uint16_t e_machine = perf_env__e_machine(env, /*e_flags=*/NULL);
>
> - return normalize_arch(arch_name);
> + env->arch = strdup(e_machine_to_perf_arch(e_machine));
> + }
> + return env->arch;
> }
>
> #if defined(HAVE_LIBTRACEEVENT)
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..91ff252712f4 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -186,6 +186,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
>
> void cpu_cache_level__free(struct cpu_cache_level *cache);
>
> +uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags);
> const char *perf_env__arch(struct perf_env *env);
> const char *perf_env__arch_strerrno(struct perf_env *env, int err);
> const char *perf_env__cpuid(struct perf_env *env);
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 4b465abfa36c..dcc9bef303aa 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -2996,14 +2996,16 @@ uint16_t perf_session__e_machine(struct perf_session *session, uint32_t *e_flags
> return EM_HOST;
> }
>
> + /* Is the env caching an e_machine? */
> env = perf_session__env(session);
> - if (env && env->e_machine != EM_NONE) {
> - if (e_flags)
> - *e_flags = env->e_flags;
> -
> - return env->e_machine;
> - }
> + if (env && env->e_machine != EM_NONE)
> + return perf_env__e_machine(env, e_flags);
>
> + /*
> + * Compute from threads, note this is more accurate than
> + * perf_env__e_machine that falls back on EM_HOST and doesn't consider
> + * mixed 32-bit and 64-bit threads.
> + */
> machines__for_each_thread(&session->machines,
> perf_session__e_machine_cb,
> &args);
> --
> 2.53.0.1018.g2bb0e51243-goog
>
next prev parent reply other threads:[~2026-04-06 5:05 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-19 11:38 [PATCH v2] perf symbol: Remove psw_idle() from list of idle symbols Thomas Richter
2026-02-19 11:55 ` Jan Polensky
2026-02-23 21:46 ` Namhyung Kim
2026-02-23 23:14 ` Arnaldo Melo
2026-03-02 18:43 ` Arnaldo Carvalho de Melo
2026-03-02 19:44 ` Ian Rogers
2026-03-04 14:34 ` Arnaldo Carvalho de Melo
2026-03-02 23:43 ` [PATCH v1] perf symbol: Lazily compute idle and use the perf_env Ian Rogers
2026-03-24 17:14 ` Ian Rogers
2026-03-25 6:58 ` Namhyung Kim
2026-03-25 15:58 ` Ian Rogers
2026-03-25 16:18 ` [PATCH v2] " Ian Rogers
2026-03-26 7:20 ` Honglei Wang
2026-03-26 15:11 ` Ian Rogers
2026-03-26 17:45 ` [PATCH v3 0/2] perf symbol/env: ELF machine clean up and lazy idle computation Ian Rogers
2026-03-26 17:45 ` [PATCH v3 1/2] perf env: Add perf_env__e_machine helper and use in perf_env__arch Ian Rogers
2026-03-26 17:45 ` [PATCH v3 2/2] perf symbol: Lazily compute idle and use the perf_env Ian Rogers
2026-03-27 6:56 ` Honglei Wang
2026-03-27 4:50 ` [PATCH v4 0/2] perf symbol/env: ELF machine clean up and lazy idle computation Ian Rogers
2026-03-27 4:50 ` [PATCH v4 1/2] perf env: Add perf_env__e_machine helper and use in perf_env__arch Ian Rogers
2026-04-06 5:05 ` Namhyung Kim [this message]
2026-03-27 4:50 ` [PATCH v4 2/2] perf symbol: Lazily compute idle and use the perf_env Ian Rogers
2026-04-06 5:10 ` Namhyung Kim
2026-03-27 6:00 ` [PATCH v2] perf tests task-analyzer: Write test files to tmpdir Ian Rogers
2026-03-31 7:22 ` Namhyung Kim
2026-03-31 17:58 ` Ian Rogers
2026-04-01 3:41 ` Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=adM-_viYsw5KEoeV@google.com \
--to=namhyung@kernel.org \
--cc=acme@kernel.org \
--cc=agordeev@linux.ibm.com \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=irogers@google.com \
--cc=jameshongleiwang@126.com \
--cc=japo@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=sumanthk@linux.ibm.com \
--cc=tmricht@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox