From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Tom Lendacky <thomas.lendacky@amd.com>,
Andrew Cooper <andrew.cooper3@citrix.com>,
Arjan van de Ven <arjan@linux.intel.com>,
Huang Rui <ray.huang@amd.com>, Juergen Gross <jgross@suse.com>,
Dimitri Sivanich <dimitri.sivanich@hpe.com>,
Sohil Mehta <sohil.mehta@intel.com>,
K Prateek Nayak <kprateek.nayak@amd.com>,
Kan Liang <kan.liang@linux.intel.com>,
Zhang Rui <rui.zhang@intel.com>,
"Paul E. McKenney" <paulmck@kernel.org>,
Feng Tang <feng.tang@intel.com>,
Andy Shevchenko <andy@infradead.org>,
Michael Kelley <mhklinux@outlook.com>,
"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Date: Tue, 23 Jan 2024 13:53:39 +0100 (CET) [thread overview]
Message-ID: <20240117115908.674834306@linutronix.de> (raw)
In-Reply-To: 20240117115752.863482697@linutronix.de
From: Thomas Gleixner <tglx@linutronix.de>
detect_extended_topology() along with it's early() variant is a classic
example for duct tape engineering:
- It evaluates an array of subleafs with a boatload of local variables
for the relevant topology levels instead of using an array to save the
enumerated information and propagate it to the right level
- It has no boundary checks for subleafs
- It prevents updating the die_id with a crude workaround instead of
checking for leaf 0xb which does not provide die information.
- It's broken vs. the number of dies evaluation as it uses:
num_processors[DIE_LEVEL] / num_processors[CORE_LEVEL]
which "works" only correctly if there is none of the intermediate
topology levels (MODULE/TILE) enumerated.
There is zero value in trying to "fix" that code as the only proper fix is
to rewrite it from scratch.
Implement a sane parser with proper code documentation, which will be used
for the consolidated topology evaluation in the next step.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
---
arch/x86/kernel/cpu/Makefile | 2
arch/x86/kernel/cpu/topology.h | 12 +++
arch/x86/kernel/cpu/topology_ext.c | 130 +++++++++++++++++++++++++++++++++++++
3 files changed, 143 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/topology_ext.c
---
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n
KCSAN_SANITIZE_common.o := n
obj-y := cacheinfo.o scattered.o
-obj-y += topology_common.o topology.o
+obj-y += topology_common.o topology_ext.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
--- a/arch/x86/kernel/cpu/topology.h
+++ b/arch/x86/kernel/cpu/topology.h
@@ -16,6 +16,7 @@ void cpu_init_topology(struct cpuinfo_x8
void cpu_parse_topology(struct cpuinfo_x86 *c);
void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
unsigned int shift, unsigned int ncpus);
+bool cpu_parse_topology_ext(struct topo_scan *tscan);
static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
{
@@ -36,4 +37,15 @@ static inline u32 topo_domain_mask(enum
return (1U << x86_topo_system.dom_shifts[dom]) - 1;
}
+/*
+ * Update a domain level after the fact without propagating. Used to fixup
+ * broken CPUID enumerations.
+ */
+static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ unsigned int shift, unsigned int ncpus)
+{
+ tscan->dom_shifts[dom] = shift;
+ tscan->dom_ncpus[dom] = ncpus;
+}
+
#endif /* ARCH_X86_TOPOLOGY_H */
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_ext.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/memtype.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+enum topo_types {
+ INVALID_TYPE = 0,
+ SMT_TYPE = 1,
+ CORE_TYPE = 2,
+ MAX_TYPE_0B = 3,
+ MODULE_TYPE = 3,
+ TILE_TYPE = 4,
+ DIE_TYPE = 5,
+ DIEGRP_TYPE = 6,
+ MAX_TYPE_1F = 7,
+};
+
+/*
+ * Use a lookup table for the case that there are future types > 6 which
+ * describe an intermediate domain level which does not exist today.
+ */
+static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = {
+ [SMT_TYPE] = TOPO_SMT_DOMAIN,
+ [CORE_TYPE] = TOPO_CORE_DOMAIN,
+ [MODULE_TYPE] = TOPO_MODULE_DOMAIN,
+ [TILE_TYPE] = TOPO_TILE_DOMAIN,
+ [DIE_TYPE] = TOPO_DIE_DOMAIN,
+ [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN,
+};
+
+static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
+ unsigned int *last_dom)
+{
+ unsigned int dom, maxtype;
+ const unsigned int *map;
+ struct {
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+ // ecx
+ u32 level : 8, // Current topology level. Same as sub leaf number
+ type : 8, // Level type. If 0, invalid
+ : 16; // Reserved
+ // edx
+ u32 x2apic_id : 32; // X2APIC ID of the current logical processor
+ } sl;
+
+ switch (leaf) {
+ case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break;
+ case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break;
+ default: return false;
+ }
+
+ cpuid_subleaf(leaf, subleaf, &sl);
+
+ if (!sl.num_processors || sl.type == INVALID_TYPE)
+ return false;
+
+ if (sl.type >= maxtype) {
+ pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n",
+ leaf, subleaf, sl.type);
+ /*
+ * It really would have been too obvious to make the domain
+ * type space sparse and leave a few reserved types between
+ * the points which might change instead of following the
+ * usual "this can be fixed in software" principle.
+ */
+ dom = *last_dom + 1;
+ } else {
+ dom = map[sl.type];
+ *last_dom = dom;
+ }
+
+ if (!dom) {
+ tscan->c->topo.initial_apicid = sl.x2apic_id;
+ } else if (tscan->c->topo.initial_apicid != sl.x2apic_id) {
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n",
+ leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id);
+ }
+
+ topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors);
+ return true;
+}
+
+static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
+{
+ unsigned int last_dom;
+ u32 subleaf;
+
+ /* Read all available subleafs and populate the levels */
+ for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
+
+ /* If subleaf 0 failed to parse, give up */
+ if (!subleaf)
+ return false;
+
+ /*
+ * There are machines in the wild which have shift 0 in the subleaf
+ * 0, but advertise 2 logical processors at that level. They are
+ * truly SMT.
+ */
+ if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
+ unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
+ leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ }
+
+ set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
+ return true;
+}
+
+bool cpu_parse_topology_ext(struct topo_scan *tscan)
+{
+ /* Intel: Try leaf 0x1F first. */
+ if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f))
+ return true;
+
+ /* Intel/AMD: Fall back to leaf 0xB if available */
+ return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b);
+}
next prev parent reply other threads:[~2024-01-23 12:53 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-23 12:53 [patch v5 00/19] x86/cpu: Rework topology evaluation Thomas Gleixner
2024-01-23 12:53 ` [patch v5 01/19] x86/cpu: Provide cpuid_read() et al Thomas Gleixner
2024-01-24 12:25 ` Borislav Petkov
2024-01-24 20:02 ` Borislav Petkov
2024-02-12 13:57 ` Thomas Gleixner
2024-01-23 12:53 ` [patch v5 02/19] x86/cpu: Provide cpu_init/parse_topology() Thomas Gleixner
2024-02-01 22:16 ` Sohil Mehta
2024-01-23 12:53 ` [patch v5 03/19] x86/cpu: Add legacy topology parser Thomas Gleixner
2024-01-24 20:12 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 04/19] x86/cpu: Use common topology code for Centaur and Zhaoxin Thomas Gleixner
2024-01-30 19:09 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 05/19] x86/cpu: Move __max_die_per_package to common.c Thomas Gleixner
2024-01-23 12:53 ` Thomas Gleixner [this message]
2024-01-30 19:31 ` [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser Borislav Petkov
2024-02-12 14:17 ` Thomas Gleixner
2024-02-12 15:00 ` Borislav Petkov
2024-02-12 15:08 ` Thomas Gleixner
2024-02-12 15:43 ` Borislav Petkov
2024-02-12 23:02 ` Thomas Gleixner
2024-02-12 15:03 ` Thomas Gleixner
2024-02-12 15:05 ` Borislav Petkov
2024-02-13 14:30 ` [tip: x86/misc] Documentation/maintainer-tip: Add C++ tail comments exception tip-bot2 for Borislav Petkov (AMD)
2024-01-23 12:53 ` [patch v5 07/19] x86/cpu: Use common topology code for Intel Thomas Gleixner
2024-02-01 15:07 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 08/19] x86/cpu/amd: Provide a separate accessor for Node ID Thomas Gleixner
2024-02-01 15:19 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 09/19] x86/cpu: Provide an AMD/HYGON specific topology parser Thomas Gleixner
2024-02-01 15:55 ` Borislav Petkov
2024-02-02 12:30 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 10/19] x86/smpboot: Teach it about topo.amd_node_id Thomas Gleixner
2024-02-06 15:48 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 11/19] x86/cpu: Use common topology code for AMD Thomas Gleixner
2024-02-06 15:58 ` Borislav Petkov
2024-02-12 14:50 ` Thomas Gleixner
2024-02-12 15:06 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 12/19] x86/cpu: Use common topology code for HYGON Thomas Gleixner
2024-01-23 12:53 ` [patch v5 13/19] x86/mm/numa: Use core domain size on AMD Thomas Gleixner
2024-02-12 15:56 ` Borislav Petkov
2024-01-23 12:53 ` [patch v5 14/19] x86/cpu: Make topology_amd_node_id() use the actual node info Thomas Gleixner
2024-01-23 12:53 ` [patch v5 15/19] x86/cpu: Remove topology.c Thomas Gleixner
2024-01-23 12:53 ` [patch v5 16/19] x86/cpu: Remove x86_coreid_bits Thomas Gleixner
2024-01-23 12:53 ` [patch v5 17/19] x86/apic: Remove unused phys_pkg_id() callback Thomas Gleixner
2024-01-23 12:53 ` [patch v5 18/19] x86/xen/smp_pv: Remove cpudata fiddling Thomas Gleixner
2024-01-23 12:53 ` [patch v5 19/19] x86/apic/uv: Remove the private leaf 0xb parser Thomas Gleixner
2024-01-31 7:40 ` [patch v5 00/19] x86/cpu: Rework topology evaluation Zhang, Rui
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240117115908.674834306@linutronix.de \
--to=tglx@linutronix.de \
--cc=andrew.cooper3@citrix.com \
--cc=andy@infradead.org \
--cc=arjan@linux.intel.com \
--cc=dimitri.sivanich@hpe.com \
--cc=feng.tang@intel.com \
--cc=jgross@suse.com \
--cc=kan.liang@linux.intel.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mhklinux@outlook.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=ray.huang@amd.com \
--cc=rui.zhang@intel.com \
--cc=sohil.mehta@intel.com \
--cc=thomas.lendacky@amd.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox