[patch 36/53] x86/cpu/topology: Rework possible CPU management

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Tom Lendacky <thomas.lendacky@amd.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Huang Rui <ray.huang@amd.com>, Juergen Gross <jgross@suse.com>,
	Dimitri Sivanich <dimitri.sivanich@hpe.com>,
	Michael Kelley <mikelley@microsoft.com>,
	Sohil Mehta <sohil.mehta@intel.com>,
	K Prateek Nayak <kprateek.nayak@amd.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Zhang Rui <rui.zhang@intel.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Feng Tang <feng.tang@intel.com>,
	Andy Shevchenko <andy@infradead.org>
Subject: [patch 36/53] x86/cpu/topology: Rework possible CPU management
Date: Mon,  7 Aug 2023 15:53:30 +0200 (CEST)	[thread overview]
Message-ID: <20230807135028.328142041@linutronix.de> (raw)
In-Reply-To: 20230807130108.853357011@linutronix.de

Managing possible CPUs is an unreadable and uncomprehensible maze. Aside of
that it's backwards because it applies command line limits after
registering all APICs.

Rewrite it so that it:

  - Applies the command line limits upfront so that only the allowed amount
    of APIC IDs can be registered.

  - Applies eventual late restrictions in an understandable way

  - Uses simple min_t() calculations which are trivial to follow.

  - Provides a separate function for resetting to UP mode late in the
    bringup process.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/apic.h     |    5 +
 arch/x86/include/asm/topology.h |    4 
 arch/x86/kernel/cpu/topology.c  |  176 ++++++++++++++++++++++++----------------
 arch/x86/kernel/setup.c         |    9 --
 arch/x86/kernel/smpboot.c       |    6 -
 5 files changed, 120 insertions(+), 80 deletions(-)

--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -175,6 +175,9 @@ extern void topology_register_apic(u32 a
 extern void topology_register_boot_apic(u32 apic_id);
 extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
 extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
@@ -190,6 +193,8 @@ static inline void apic_intr_mode_init(v
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
 static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -190,6 +190,9 @@ static inline bool topology_is_primary_t
 {
 	return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
 }
+
+void topology_apply_cmdline_limits_early(void);
+
 #else /* CONFIG_SMP */
 #define topology_max_packages()			(1)
 static inline int
@@ -202,6 +205,7 @@ static inline int topology_max_smt_threa
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
 static inline bool topology_smt_supported(void) { return false; }
 static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; };
+static inline void topology_apply_cmdline_limits_early(void) { }
 #endif /* !CONFIG_SMP */
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -5,6 +5,7 @@
 #include <xen/xen.h>
 
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/mpspec.h>
 #include <asm/smp.h>
 
@@ -85,73 +86,6 @@ early_initcall(smp_init_primary_thread_m
 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
 #endif
 
-static int __initdata setup_possible_cpus = -1;
-
-/*
- * cpu_possible_mask should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and don't expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_mask on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
-	unsigned int num_processors = topo_info.nr_assigned_cpus;
-	unsigned int disabled_cpus = topo_info.nr_disabled_cpus;
-	int i, possible;
-
-	i = setup_max_cpus ?: 1;
-	if (setup_possible_cpus == -1) {
-		possible = topo_info.nr_assigned_cpus;
-#ifdef CONFIG_HOTPLUG_CPU
-		if (setup_max_cpus)
-			possible += num_processors;
-#else
-		if (possible > i)
-			possible = i;
-#endif
-	} else
-		possible = setup_possible_cpus;
-
-	total_cpus = max_t(int, possible, num_processors + disabled_cpus);
-
-	/* nr_cpu_ids could be reduced via nr_cpus= */
-	if (possible > nr_cpu_ids) {
-		pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
-			possible, nr_cpu_ids);
-		possible = nr_cpu_ids;
-	}
-
-#ifdef CONFIG_HOTPLUG_CPU
-	if (!setup_max_cpus)
-#endif
-	if (possible > i) {
-		pr_warn("%d Processors exceeds max_cpus limit of %u\n",
-			possible, setup_max_cpus);
-		possible = i;
-	}
-
-	set_nr_cpu_ids(possible);
-
-	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
-		possible, max_t(int, possible - num_processors, 0));
-
-	reset_cpu_possible_mask();
-
-	for (i = 0; i < possible; i++)
-		set_cpu_possible(i, true);
-}
-
 static int topo_lookup_cpuid(u32 apic_id)
 {
 	int i;
@@ -294,12 +228,114 @@ void topology_hotunplug_apic(unsigned in
 }
 #endif
 
-static int __init _setup_possible_cpus(char *str)
+#ifdef CONFIG_SMP
+static unsigned int max_possible_cpus __initdata = NR_CPUS;
+
+/**
+ * topology_apply_cmdline_limits_early - Apply topology command line limits early
+ *
+ * Ensure that command line limits are in effect before firmware parsing
+ * takes place.
+ */
+void __init topology_apply_cmdline_limits_early(void)
+{
+	unsigned int possible = nr_cpu_ids;
+
+	/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
+	if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+		possible = 1;
+
+	/* 'possible_cpus=N' */
+	possible = min_t(unsigned int, max_possible_cpus, possible);
+
+	if (possible < nr_cpu_ids) {
+		pr_info("Limiting to %u possible CPUs\n", possible);
+		set_nr_cpu_ids(possible);
+	}
+}
+
+static __init bool restrict_to_up(void)
+{
+	if (!smp_found_config || ioapic_is_disabled)
+		return true;
+	/*
+	 * XEN PV is special as it does not advertise the local APIC
+	 * properly, but provides a fake topology for it so that the
+	 * infrastructure works. So don't apply the restrictions vs. APIC
+	 * here.
+	 */
+	if (xen_pv_domain())
+		return false;
+
+	return apic_is_disabled;
+}
+
+void __init topology_init_possible_cpus(void)
+{
+	unsigned int assigned = topo_info.nr_assigned_cpus;
+	unsigned int disabled = topo_info.nr_disabled_cpus;
+	unsigned int total = assigned + disabled;
+	unsigned int cpu, allowed = 1;
+
+	if (!restrict_to_up()) {
+		if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
+			disabled += assigned - nr_cpu_ids;
+			assigned = nr_cpu_ids;
+		}
+		allowed = min_t(unsigned int, total, nr_cpu_ids);
+	}
+
+	if (total > allowed)
+		pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
+
+	assigned = min_t(unsigned int, allowed, assigned);
+	disabled = allowed - assigned;
+
+	topo_info.nr_assigned_cpus = assigned;
+	topo_info.nr_disabled_cpus = disabled;
+
+	total_cpus = allowed;
+	set_nr_cpu_ids(allowed);
+
+	pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
+	if (topo_info.nr_rejected_cpus)
+		pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
+
+	init_cpu_present(cpumask_of(0));
+	init_cpu_possible(cpumask_of(0));
+
+	for (cpu = 0; cpu < allowed; cpu++) {
+		u32 apicid = cpuid_to_apicid[cpu];
+
+		set_cpu_possible(cpu, true);
+
+		if (apicid == BAD_APICID)
+			continue;
+
+		set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
+	}
+}
+
+/*
+ * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
+ */
+void __init topology_reset_possible_cpus_up(void)
 {
-	get_option(&str, &setup_possible_cpus);
+	init_cpu_present(cpumask_of(0));
+	init_cpu_possible(cpumask_of(0));
+
+	bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+	if (topo_info.boot_cpu_apic_id != BAD_APICID)
+		set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
+}
+
+static int __init setup_possible_cpus(char *str)
+{
+	get_option(&str, &max_possible_cpus);
 	return 0;
 }
-early_param("possible_cpus", _setup_possible_cpus);
+early_param("possible_cpus", setup_possible_cpus);
+#endif
 
 static int __init apic_set_disabled_cpu_apicid(char *arg)
 {
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1258,6 +1258,8 @@ void __init setup_arch(char **cmdline_p)
 
 	early_quirks();
 
+	topology_apply_cmdline_limits_early();
+
 	/*
 	 * Parse SMP configuration. Try ACPI first and then the platform
 	 * specific parser.
@@ -1265,13 +1267,10 @@ void __init setup_arch(char **cmdline_p)
 	acpi_boot_init();
 	x86_init.mpparse.parse_smp_cfg();
 
-	/*
-	 * Systems w/o ACPI and mptables might not have it mapped the local
-	 * APIC yet, but prefill_possible_map() might need to access it.
-	 */
+	/* Last opportunity to detect and map the local APIC */
 	init_apic_mappings();
 
-	prefill_possible_map();
+	topology_init_possible_cpus();
 
 	init_cpu_to_node();
 	init_gi_nodes();
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1156,11 +1156,7 @@ static __init void disable_smp(void)
 	pr_info("SMP disabled\n");
 
 	disable_ioapic_support();
-
-	init_cpu_present(cpumask_of(0));
-	init_cpu_possible(cpumask_of(0));
-
-	reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0);
+	topology_reset_possible_cpus_up();
 
 	cpumask_set_cpu(0, topology_sibling_cpumask(0));
 	cpumask_set_cpu(0, topology_core_cpumask(0));

next prev parent reply	other threads:[~2023-08-07 13:54 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-07 13:52 [patch 00/53] x86/topology: The final installment Thomas Gleixner
2023-08-07 13:52 ` [patch 01/53] x86/cpu/topology: Cure off by one in fake_topology() Thomas Gleixner
2023-08-07 13:52 ` [patch 02/53] x86/cpu/topology: Make the APIC mismatch warnings complete Thomas Gleixner
2023-08-07 14:28   ` Arjan van de Ven
2023-08-07 14:54     ` Thomas Gleixner
2023-08-07 13:52 ` [patch 03/53] x86/platform/ce4100: Dont override x86_init.mpparse.setup_ioapic_ids Thomas Gleixner
2023-08-07 15:20   ` Andy Shevchenko
2023-08-07 13:52 ` [patch 04/53] x86/ioapic: Replace some more set bit nonsense Thomas Gleixner
2023-08-07 13:52 ` [patch 05/53] x86/apic: Get rid of get_physical_broadcast() Thomas Gleixner
2023-08-07 15:24   ` Andy Shevchenko
2023-08-07 13:52 ` [patch 06/53] x86/ioapic: Make io_apic_get_unique_id() simpler Thomas Gleixner
2023-08-07 13:52 ` [patch 07/53] x86/ioapic: Simplify setup_ioapic_ids_from_mpc_nocheck() Thomas Gleixner
2023-08-07 13:52 ` [patch 08/53] x86/apic: Remove check_apicid_used() and ioapic_phys_id_map() Thomas Gleixner
2023-08-07 13:52 ` [patch 09/53] x86/mpparse: Rename default_find_smp_config() Thomas Gleixner
2023-08-07 16:03   ` Andy Shevchenko
2023-08-07 17:21     ` Thomas Gleixner
2023-08-07 13:52 ` [patch 10/53] x86/mpparse: Provide separate early/late callbacks Thomas Gleixner
2023-08-07 13:52 ` [patch 11/53] x86/mpparse: Prepare for callback separation Thomas Gleixner
2023-08-07 13:52 ` [patch 12/53] x86/dtb: Rename x86_dtb_init() Thomas Gleixner
2023-08-07 13:52 ` [patch 13/53] x86/platform/ce4100: Prepare for separate mpparse callbacks Thomas Gleixner
2023-08-07 13:52 ` [patch 14/53] x86/platform/intel-mid: " Thomas Gleixner
2023-08-07 16:07   ` Andy Shevchenko
2023-08-07 13:52 ` [patch 15/53] x86/jailhouse: " Thomas Gleixner
2023-08-07 13:52 ` [patch 16/53] x86/xen/smp_pv: " Thomas Gleixner
2023-08-07 13:53 ` [patch 17/53] x86/mpparse: Switch to new init callbacks Thomas Gleixner
2023-08-07 13:53 ` [patch 18/53] x86/mm/numa: Move early mptable evaluation into common code Thomas Gleixner
2023-08-07 13:53 ` [patch 19/53] x86/mpparse: Remove the physid_t bitmap wrapper Thomas Gleixner
2023-08-08 11:37   ` Andy Shevchenko
2023-08-07 13:53 ` [patch 20/53] x86/apic: Remove the pointless writeback of boot_cpu_physical_apicid Thomas Gleixner
2023-08-07 13:53 ` [patch 21/53] x86/apic: Remove yet another dubious callback Thomas Gleixner
2023-08-07 13:53 ` [patch 22/53] x86/apic: Use a proper define for invalid ACPI CPU ID Thomas Gleixner
2023-08-07 13:53 ` [patch 23/53] x86/cpu/topology: Move registration out of APIC code Thomas Gleixner
2023-08-07 13:53 ` [patch 24/53] x86/cpu/topology: Provide separate APIC registration functions Thomas Gleixner
2023-08-11 12:32   ` Zhang, Rui
2023-08-07 13:53 ` [patch 25/53] x86/acpi: Use new " Thomas Gleixner
2023-08-07 15:27   ` Peter Zijlstra
2023-08-07 15:35     ` Andrew Cooper
2023-08-07 15:41     ` Thomas Gleixner
2023-08-07 13:53 ` [patch 26/53] x86/jailhouse: Use new APIC registration function Thomas Gleixner
2023-08-07 13:53 ` [patch 27/53] x86/of: Use new APIC registration functions Thomas Gleixner
2023-08-07 13:53 ` [patch 28/53] x86/mpparse: Use new APIC registration function Thomas Gleixner
2023-08-07 13:53 ` [patch 29/53] x86/acpi: Dont invoke topology_register_apic() for XEN PV Thomas Gleixner
2023-08-07 13:53 ` [patch 30/53] x86/xen/smp_pv: Register fake APICs Thomas Gleixner
2023-08-07 13:53 ` [patch 31/53] x86/cpu/topology: Confine topology information Thomas Gleixner
2023-08-07 13:53 ` [patch 32/53] x86/cpu/topology: Simplify APIC registration Thomas Gleixner
2023-08-07 13:53 ` [patch 33/53] x86/cpu/topology: Use a data structure for topology info Thomas Gleixner
2023-08-07 13:53 ` [patch 34/53] x86/smpboot: Make error message actually useful Thomas Gleixner
2023-08-07 13:53 ` [patch 35/53] x86/cpu/topology: Sanitize the APIC admission logic Thomas Gleixner
2023-08-07 13:53 ` Thomas Gleixner [this message]
2023-08-14  8:29   ` [patch 36/53] x86/cpu/topology: Rework possible CPU management Zhang, Rui
2023-08-07 13:53 ` [patch 37/53] x86/cpu: Detect real BSP on crash kernels Thomas Gleixner
2024-01-08 14:11   ` Zhang, Rui
2024-01-08 14:54     ` Thomas Gleixner
2024-01-08 16:13       ` Thomas Gleixner
2024-01-09  1:54         ` Zhang, Rui
2024-01-10 14:19           ` Thomas Gleixner
2024-01-10 15:14             ` Thomas Gleixner
2024-01-11  1:52               ` Zhang, Rui
2024-01-12  9:14               ` Zhang, Rui
2024-01-12 15:39                 ` Thomas Gleixner
2024-01-13  7:35                   ` Zhang, Rui
2024-01-15  9:41                     ` Thomas Gleixner
2023-08-07 13:53 ` [patch 38/53] x86/topology: Add a mechanism to track topology via APIC IDs Thomas Gleixner
2023-08-07 13:53 ` [patch 39/53] x86/cpu/topology: Reject unknown APIC IDs on ACPI hotplug Thomas Gleixner
2023-08-07 13:53 ` [patch 40/53] x86/cpu/topology: Assign hotpluggable CPUIDs during init Thomas Gleixner
2023-08-07 13:53 ` [patch 41/53] x86/xen/smp_pv: Count number of vCPUs early Thomas Gleixner
2023-08-07 13:53 ` [patch 42/53] x86/cpu/topology: Let XEN/PV use topology from CPUID/MADT Thomas Gleixner
2023-08-07 13:53 ` [patch 43/53] x86/cpu/topology: Use topology bitmaps for sizing Thomas Gleixner
2023-08-07 13:53 ` [patch 44/53] x86/cpu/topology: Mop up primary thread mask handling Thomas Gleixner
2023-08-07 13:53 ` [patch 45/53] x86/cpu/topology: Simplify cpu_mark_primary_thread() Thomas Gleixner
2023-08-07 13:53 ` [patch 46/53] x86/cpu/topology: Provide logical pkg/die mapping Thomas Gleixner
2023-08-07 13:53 ` [patch 47/53] x86/cpu/topology: Use topology logical mapping mechanism Thomas Gleixner
2023-08-07 13:53 ` [patch 48/53] x86/cpu/topology: Retrieve cores per package from topology bitmaps Thomas Gleixner
2023-08-07 13:53 ` [patch 49/53] x86: Use topology functions instead of smp_num_siblings where applicable Thomas Gleixner
2023-08-07 13:53 ` [patch 50/53] x86/cpu/topology: Rename smp_num_siblings Thomas Gleixner
2023-08-07 13:53 ` [patch 51/53] x86/cpu/topology: Rename topology_max_die_per_package() Thomas Gleixner
2023-08-07 13:53 ` [patch 52/53] x86/cpu/topology: Provide __num_[cores|threads]_per_package Thomas Gleixner
2023-08-07 13:53 ` [patch 53/53] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores Thomas Gleixner
2023-08-11 15:44   ` Zhang, Rui
2023-12-14 14:00   ` Zhang, Rui
2023-08-08  7:40 ` [patch 00/53] x86/topology: The final installment Juergen Gross
2023-08-08 11:20   ` Andrew Cooper
2023-08-08 18:55     ` Thomas Gleixner
2023-08-08 18:29 ` Sohil Mehta
2023-08-08 19:10   ` Thomas Gleixner
2023-08-08 20:30     ` Sohil Mehta
2023-08-08 20:41       ` Thomas Gleixner
2023-08-08 22:10         ` Peter Zijlstra
2023-08-08 22:58           ` Sohil Mehta
2023-08-08 23:20             ` Thomas Gleixner
2023-08-09 16:55               ` Sohil Mehta
2023-08-10  3:28               ` Zhang, Rui
2023-08-09 16:50             ` Qiuxu Zhuo
2023-08-09 17:23               ` Sohil Mehta
2023-08-10  1:33                 ` Zhuo, Qiuxu
2023-08-08 20:57       ` Thomas Gleixner
2023-08-09 16:12 ` Qiuxu Zhuo
2023-08-12 13:51 ` Michael Kelley (LINUX)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230807135028.328142041@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=andrew.cooper3@citrix.com \
    --cc=andy@infradead.org \
    --cc=arjan@linux.intel.com \
    --cc=dimitri.sivanich@hpe.com \
    --cc=feng.tang@intel.com \
    --cc=jgross@suse.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=paulmck@kernel.org \
    --cc=ray.huang@amd.com \
    --cc=rui.zhang@intel.com \
    --cc=sohil.mehta@intel.com \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox