public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Feng Tang <feng.tang@intel.com>, Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H . Peter Anvin" <hpa@zytor.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	rui.zhang@intel.com, tim.c.chen@intel.com
Subject: Re: [Patch v2 2/2] x86/tsc: use logical_packages as a better estimation of socket numbers
Date: Fri, 23 Jun 2023 01:07:24 +0200	[thread overview]
Message-ID: <87edm36qqb.ffs@tglx> (raw)
In-Reply-To: <87h6qz7et0.ffs@tglx>

On Thu, Jun 22 2023 at 16:27, Thomas Gleixner wrote:
> On Fri, Jun 16 2023 at 15:18, Feng Tang wrote:
> So something like the below should just work.

Well it works in principle, but does not take any of the command line
parameters which limit nr_possible CPUs or the actual kernel
configuration into account. But the principle itself works correctly.

Below is an updated version, which takes them into account.

The data here is from a two socket system with 32 CPUs per socket.

No command line parameters (NR_CPUS=64):

 smpboot: Allowing 64 CPUs, 32 hotplug CPUs
 clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1e3306b9ada, max_idle_ns: 440795224413 ns
 smp: Brought up 1 node, 32 CPUs
 smpboot: Max logical packages ACPI enumeration: 2

"possible_cpus=32" (NR_CPUS=64) or
No command line parameter (NR_CPUS=32):

 smpboot: Allowing 32 CPUs, 0 hotplug CPUs
 clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1e3306b9ada, max_idle_ns: 440795224413 ns
 smp: Brought up 1 node, 32 CPUs
 smpboot: Max logical packages ACPI enumeration: 1

maxcpus=32
 smpboot: Allowing 64 CPUs, 0 hotplug CPUs
 clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1e3306b9ada, max_idle_ns: 440795224413 ns
 smp: Brought up 1 node, 32 CPUs
 smpboot: Max logical packages ACPI enumeration: 2

But that's really all we should do. If the ACPI table enumerates CPUs as
hotpluggable which can never arrive, then so be it.

We have enough parameters to override the BIOS nonsense. Trying to do
more magic MAD table parsing with heuristics is just wrong.

We already have way too many heuristics and workarounds for broken
firmware, but for the problem at hand, we really don't need more.

The only systems I observed so far which have a non-sensical amount of
"hotpluggable" CPUs are high-end server machines. It's a resonable
expectation that machines with high-end price tags come with correct
firmware. Trying to work around that (except with the existing command
line options) is just proliferating this mess. This has to stop.

Thanks,

        tglx
---

--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -509,9 +509,12 @@ extern int default_check_phys_apicid_pre
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
 void apic_smt_update(void);
+extern unsigned int apic_to_pkg_shift;
+void logical_packages_update(u32 apicid, bool enabled);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
 static inline void apic_smt_update(void) { }
+static inline void logical_packages_update(u32 apicid, bool enabled) { }
 #endif
 
 struct msi_msg;
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -178,6 +178,7 @@ static int acpi_register_lapic(int id, u
 	}
 
 	if (!enabled) {
+		logical_packages_update(acpiid, false);
 		++disabled_cpus;
 		return -EINVAL;
 	}
@@ -189,6 +190,8 @@ static int acpi_register_lapic(int id, u
 	if (cpu >= 0)
 		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
+	logical_packages_update(acpiid, cpu >= 0);
+
 	return cpu;
 }
 
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -692,6 +692,8 @@ static void early_init_amd(struct cpuinf
 		}
 	}
 
+	detect_extended_topology_early(c);
+
 	if (cpu_has(c, X86_FEATURE_TOPOEXT))
 		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
 }
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -29,6 +29,8 @@ unsigned int __max_die_per_package __rea
 EXPORT_SYMBOL(__max_die_per_package);
 
 #ifdef CONFIG_SMP
+unsigned int apic_to_pkg_shift __ro_after_init;
+
 /*
  * Check if given CPUID extended topology "leaf" is implemented
  */
@@ -66,7 +68,7 @@ int detect_extended_topology_early(struc
 {
 #ifdef CONFIG_SMP
 	unsigned int eax, ebx, ecx, edx;
-	int leaf;
+	int leaf, subleaf;
 
 	leaf = detect_extended_topology_leaf(c);
 	if (leaf < 0)
@@ -80,6 +82,14 @@ int detect_extended_topology_early(struc
 	 */
 	c->initial_apicid = edx;
 	smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
+
+	for (subleaf = 1; subleaf < 8; subleaf++) {
+		cpuid_count(leaf, subleaf, &eax, &ebx, &ecx, &edx);
+
+		if (ebx == 0 || !LEAFB_SUBTYPE(ecx))
+			break;
+		apic_to_pkg_shift = BITS_SHIFT_NEXT_LEVEL(eax);
+	}
 #endif
 	return 0;
 }
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1501,17 +1501,91 @@ void __init native_smp_prepare_boot_cpu(
 	native_pv_lock_init();
 }
 
+struct logical_pkg {
+	unsigned int	enabled_cpus;
+	unsigned int	disabled_cpus;
+};
+
+/*
+ * Needs to be size of NR_CPUS because virt allows to create the weirdest
+ * topologies just because it can.
+ */
+static struct logical_pkg logical_pkgs[NR_CPUS] __refdata;
+
+void logical_packages_update(u32 apicid, bool enabled)
+{
+	struct logical_pkg *lp;
+	unsigned int pkg;
+
+	if (!apic_to_pkg_shift || system_state != SYSTEM_BOOTING)
+		return;
+
+	pkg = (apicid >> apic_to_pkg_shift);
+
+	lp = logical_pkgs + pkg;
+	if (enabled)
+		lp->enabled_cpus++;
+	else
+		lp->disabled_cpus++;
+
+	if (++pkg > __max_logical_packages)
+		__max_logical_packages = pkg;
+}
+
+static void __init logical_packages_finish_setup(unsigned int possible)
+{
+	unsigned int pkg, maxpkg = 0, maxcpus = 0;
+
+	if (!apic_to_pkg_shift)
+		return;
+
+	/* Scan the enabled CPUs first */
+	for (pkg = 0; pkg < __max_logical_packages; pkg++) {
+		if (!logical_pkgs[pkg].enabled_cpus)
+			continue;
+
+		maxpkg++;
+		maxcpus += logical_pkgs[pkg].enabled_cpus;
+
+		if (maxcpus >= possible) {
+			__max_logical_packages = maxpkg;
+			return;
+		}
+	}
+
+	/* There is still room, scan for disabled CPUs */
+	for (pkg = 0; pkg < __max_logical_packages; pkg++) {
+		if (logical_pkgs[pkg].enabled_cpus || !logical_pkgs[pkg].disabled_cpus)
+			continue;
+
+		maxpkg++;
+		maxcpus += logical_pkgs[pkg].disabled_cpus;
+
+		if (maxcpus >= possible)
+			break;
+	}
+
+	__max_logical_packages = maxpkg;
+}
+
 void __init calculate_max_logical_packages(void)
 {
 	int ncpus;
 
+	if (__max_logical_packages) {
+		pr_info("Max logical packages ACPI enumeration: %u\n",
+		       __max_logical_packages);
+		return;
+	}
+
 	/*
 	 * Today neither Intel nor AMD support heterogeneous systems so
 	 * extrapolate the boot cpu's data to all packages.
 	 */
 	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
 	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+	pr_info("Max logical packages estimated: %u\n", __max_logical_packages);
 }
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
@@ -1619,6 +1693,8 @@ early_param("possible_cpus", _setup_poss
 
 	for (i = 0; i < possible; i++)
 		set_cpu_possible(i, true);
+
+	logical_packages_finish_setup(possible);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU




  reply	other threads:[~2023-06-22 23:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-13  5:25 [Patch v2 1/2] smp: Add helper function to mark possible bad package number Feng Tang
2023-06-13  5:25 ` [Patch v2 2/2] x86/tsc: use logical_packages as a better estimation of socket numbers Feng Tang
2023-06-15  9:20   ` Peter Zijlstra
2023-06-16  6:53     ` Zhang, Rui
2023-06-16  8:02       ` Peter Zijlstra
2023-06-16  8:10         ` Peter Zijlstra
2023-06-16  9:19           ` Zhang, Rui
2023-06-16  9:42             ` Peter Zijlstra
2023-06-16 11:23               ` Zhang, Rui
2023-06-16 11:47                 ` Feng Tang
2023-06-16  8:22         ` Peter Zijlstra
2023-06-19 10:42         ` Feng Tang
2023-06-16  7:18     ` Feng Tang
2023-06-22 14:27       ` Thomas Gleixner
2023-06-22 23:07         ` Thomas Gleixner [this message]
2023-06-23 15:49           ` Zhang, Rui
     [not found]           ` <ZJW0gi5oQQbxf8Df@feng-clx>
2023-06-25 14:51             ` Feng Tang
2023-06-27 11:14               ` Thomas Gleixner
2023-06-29 13:27                 ` Feng Tang
2023-07-17 13:38                   ` Feng Tang
2023-07-26 19:37                     ` Thomas Gleixner
2023-07-27  1:24                       ` Feng Tang
2023-06-23 15:36         ` Zhang, Rui

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87edm36qqb.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=dwmw@amazon.co.uk \
    --cc=feng.tang@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rui.zhang@intel.com \
    --cc=tim.c.chen@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox