public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Laura Nao <laura.nao@collabora.com>
Cc: kernel@collabora.com, laura.nao@collabora.com,
	linux-kernel@vger.kernel.org, regressions@leemhuis.info,
	regressions@lists.linux.dev, x86@kernel.org
Subject: Re: [REGRESSION] mainline boot regression on AMD Stoney Ridge Chromebooks
Date: Wed, 10 Apr 2024 15:57:24 +0200	[thread overview]
Message-ID: <878r1l48xn.ffs@tglx> (raw)
In-Reply-To: <20240410081529.126363-1-laura.nao@collabora.com>

Laura!

On Wed, Apr 10 2024 at 10:15, Laura Nao wrote:
> On 4/9/24 14:25, Thomas Gleixner wrote:
>> Can you please replace that patch with the one below?
>
> So, with this patch applied on top of ace278e7eca6 the kernel doesn't
> boot anymore - reference test job: 
> https://lava.collabora.dev/scheduler/job/13324010
>
> I see the only change between the second and third patch you provided,
> besides the debug prints, is:
>
> -	if (!topo_is_converted(c))
> -		return;
> -

Right. So this limits the area to search significantly.

> Printing the debug information without this probably doesn't really help, 
> but just in case it's useful: I tried excluding the change above from the
> patch while leaving everything else unchanged - reference test job:
> https://lava.collabora.dev/scheduler/job/13324298 (also pasted the
> kernel log here for easier consultation:
> https://pastebin.com/raw/TQBDvCah)
>
> Hope this helps,

It does. Good idea!

I just moved the exit check a bit so we should see the scan info. That
should tell me what goes south.

Thanks,

        tglx
---
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1616,6 +1616,13 @@ void __init early_cpu_init(void)
 #endif
 	}
 	early_identify_cpu(&boot_cpu_data);
+
+	pr_info("Max cores: %u\n", boot_cpu_data.x86_max_cores);
+	pr_info("pkg %u die %u core %u nid %u\n", boot_cpu_data.topo.pkg_id,
+		boot_cpu_data.topo.die_id, boot_cpu_data.topo.core_id,
+		boot_cpu_data.topo.amd_node_id);
+	pr_info("SNS %u\n", smp_num_siblings);
+	pr_info("NPP %u MDPP %u\n", __amd_nodes_per_pkg, __max_die_per_package);
 }
 
 static bool detect_null_seg_behavior(void)
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -29,7 +29,17 @@ static bool parse_8000_0008(struct topo_
 	if (!sft)
 		sft = get_count_order(ecx.cpu_nthreads + 1);
 
-	topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+	/*
+	 * cpu_nthreads describes the number of threads in the package
+	 * sft is the number of APIC ID bits per package
+	 *
+	 * As the number of actual threads per core is not described in
+	 * this leaf, just set the CORE domain shift and let the later
+	 * parsers set SMT shift. Assume one thread per core by default
+	 * which is correct if there are no other CPUID leafs to parse.
+	 */
+	topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+	topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
 	return true;
 }
 
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_
 	tscan->c->topo.initial_apicid = leaf.ext_apic_id;
 
 	/*
-	 * If leaf 0xb is available, then SMT shift is set already. If not
-	 * take it from ecx.threads_per_core and use topo_update_dom() -
-	 * topology_set_dom() would propagate and overwrite the already
-	 * propagated CORE level.
+	 * If leaf 0xb is available, then the domain shifts are set
+	 * already and nothing to do here.
 	 */
 	if (!has_0xb) {
+		/*
+		 * Leaf 0x80000008 set the CORE domain shift already.
+		 * Update the SMT domain, but do not propagate it.
+		 */
 		unsigned int nthreads = leaf.core_nthreads + 1;
 
 		topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -133,6 +133,10 @@ static void parse_topology(struct topo_s
 	tscan->ebx1_nproc_shift = get_count_order(ebx.nproc);
 
 	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (IS_ENABLED(CONFIG_CPU_SUP_AMD))
+			cpu_parse_topology_amd(tscan);
+		break;
 	case X86_VENDOR_CENTAUR:
 	case X86_VENDOR_ZHAOXIN:
 		parse_legacy(tscan);
@@ -162,6 +166,9 @@ static void topo_set_ids(struct topo_sca
 
 	if (c->x86_vendor == X86_VENDOR_AMD)
 		cpu_topology_fixup_amd(tscan);
+
+	pr_info("pkg %u die %u core %u nid %u\n", c->topo.pkg_id, c->topo.die_id,
+		c->topo.core_id, c->topo.amd_node_id);
 }
 
 static void topo_set_max_cores(struct topo_scan *tscan)
@@ -175,6 +182,7 @@ static void topo_set_max_cores(struct to
 	 */
 	tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >>
 		x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN];
+	pr_info("Max cores: %u\n", tscan->c->x86_max_cores);
 }
 
 void cpu_parse_topology(struct cpuinfo_x86 *c)
@@ -215,20 +223,26 @@ void __init cpu_init_topology(struct cpu
 
 	parse_topology(&tscan, true);
 
-	if (!topo_is_converted(c))
-		return;
-
 	/* Copy the shift values and calculate the unit sizes. */
 	memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts));
 
 	dom = TOPO_SMT_DOMAIN;
 	x86_topo_system.dom_size[dom] = 1U << x86_topo_system.dom_shifts[dom];
+	pr_info("Dom %u Sft: %u Sz: %u\n", dom, x86_topo_system.dom_size[dom],
+		x86_topo_system.dom_shifts[dom]);
 
 	for (dom++; dom < TOPO_MAX_DOMAIN; dom++) {
 		sft = x86_topo_system.dom_shifts[dom] - x86_topo_system.dom_shifts[dom - 1];
 		x86_topo_system.dom_size[dom] = 1U << sft;
+		pr_info("Dom %u Sft: %u Sz: %u\n", dom, x86_topo_system.dom_size[dom],
+			x86_topo_system.dom_shifts[dom]);
 	}
 
+	pr_info("NPP %u\n", tscan.amd_nodes_per_pkg);
+
+	if (!topo_is_converted(c))
+		return;
+
 	topo_set_ids(&tscan);
 	topo_set_max_cores(&tscan);
 
@@ -238,6 +252,7 @@ void __init cpu_init_topology(struct cpu
 	 * changes further down the road to get it right during early boot.
 	 */
 	smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN];
+	pr_info("SNS %u\n", smp_num_siblings);
 
 	/*
 	 * Neither it's clear whether there are as many dies as the APIC
@@ -252,4 +267,6 @@ void __init cpu_init_topology(struct cpu
 	 */
 	if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON)
 		__amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg;
+
+	pr_info("NPP %u MDPP %u\n", __amd_nodes_per_pkg, __max_die_per_package);
 }

  reply	other threads:[~2024-04-10 13:57 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-22 17:52 [REGRESSION] mainline boot regression on AMD Stoney Ridge Chromebooks Laura Nao
2024-03-28  9:44 ` Laura Nao
2024-03-28 11:50   ` Laura Nao
2024-04-04  8:24     ` Linux regression tracking (Thorsten Leemhuis)
2024-04-04  9:26       ` Laura Nao
2024-04-04 13:01       ` Thomas Gleixner
2024-04-04 13:05         ` Thomas Gleixner
2024-04-04 15:23           ` Laura Nao
2024-04-04 16:14             ` Thomas Gleixner
2024-04-04 18:06               ` Thomas Gleixner
2024-04-04 19:14                 ` Thomas Gleixner
2024-04-04 20:05                   ` Thomas Gleixner
2024-04-05  8:14                     ` Laura Nao
2024-04-05  8:42                       ` Thomas Gleixner
2024-04-05 10:32                         ` Laura Nao
2024-04-05 13:38                           ` Thomas Gleixner
2024-04-05 13:58                             ` Laura Nao
2024-04-05 14:59                               ` Thomas Gleixner
2024-04-08  8:20                                 ` Thomas Gleixner
2024-04-08 11:06                                   ` Laura Nao
2024-04-08 14:19                                     ` Thomas Gleixner
2024-04-09 10:07                                       ` Laura Nao
2024-04-09 12:25                                         ` Thomas Gleixner
2024-04-10  8:15                                           ` Laura Nao
2024-04-10 13:57                                             ` Thomas Gleixner [this message]
2024-04-10 16:11                                               ` Laura Nao
2024-04-10 19:34                                                 ` Thomas Gleixner
2024-04-10 19:45                                                   ` [patch 0/2] x86/cpu/amd: Fixup the topology rework fallout Thomas Gleixner
2024-04-10 19:45                                                     ` [patch 1/2] x86/cpu/amd: Make the CPUID 0x80000008 parser correct Thomas Gleixner
2024-04-11 12:45                                                       ` [tip: x86/urgent] " tip-bot2 for Thomas Gleixner
2024-04-12 10:12                                                       ` tip-bot2 for Thomas Gleixner
2024-04-10 19:45                                                     ` [patch 2/2] x86/cpu/amd: Make the NODEID_MSR union actually work Thomas Gleixner
2024-04-11 12:45                                                       ` [tip: x86/urgent] " tip-bot2 for Thomas Gleixner
2024-04-12 10:12                                                       ` tip-bot2 for Thomas Gleixner
2024-04-11 11:27                                                     ` [patch 0/2] x86/cpu/amd: Fixup the topology rework fallout Laura Nao
2024-04-11 11:37                                                       ` Linux regression tracking (Thorsten Leemhuis)
2024-04-11 12:14                                                         ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878r1l48xn.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=kernel@collabora.com \
    --cc=laura.nao@collabora.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=regressions@leemhuis.info \
    --cc=regressions@lists.linux.dev \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox