The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Chen Yu <yu.c.chen@intel.com>
To: kprateek.nayak@amd.com, tim.c.chen@linux.intel.com, peterz@infradead.org
Cc: pan.deng@intel.com, mingo@kernel.org,
	linux-kernel@vger.kernel.org, tianyou.li@intel.com
Subject: [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain
Date: Sun, 10 May 2026 23:59:19 +0800	[thread overview]
Message-ID: <20260510155920.2587431-4-yu.c.chen@intel.com> (raw)
In-Reply-To: <20260510155920.2587431-1-yu.c.chen@intel.com>

From: Tim Chen <tim.c.chen@linux.intel.com>

Instead of using topology domain shifts (TOPO_TILE_DOMAIN) which may not
align with the actual LLC boundary, derive SBM parameters from the
authoritative LLC cache identification that the kernel already performs
during boot CPU identification.

- intel_cacheinfo_0x4(): iterates CPUID 0x04 leaves, identifies L2/L3
- cacheinfo_amd_init_llc_id(): identifies LLC for AMD/Hygon CPUs

Besides, if sbm is not defined for an architecture, define a
default implementation for it similar to cpumask. Rename
arch_sbm_max_apicid to sbm_max_apicid and put it in x86
file as it is specific for x86.
---
 arch/x86/include/asm/apic.h     |  1 +
 arch/x86/include/asm/sbm.h      | 13 ++++---------
 arch/x86/kernel/cpu/cacheinfo.c | 20 +++++++++++++++++++-
 arch/x86/kernel/cpu/common.c    |  8 ++++++++
 arch/x86/kernel/cpu/topology.c  | 12 ++----------
 include/linux/sbm.h             |  9 +++++++--
 kernel/sched/core.c             |  2 ++
 kernel/sched/fair.c             |  6 ++++++
 kernel/sched/sched.h            |  1 +
 kernel/sched/topology.c         |  6 ++++++
 lib/sbm.c                       | 28 ++++++++++++++++++++++++----
 11 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 24012a91ac1e..90406d8d6af1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -55,6 +55,7 @@ static inline void x86_32_probe_apic(void) { }
 
 extern u32 cpuid_to_apicid[];
 extern u32 apicid_to_cpuid[];
+extern unsigned int sbm_max_apicid;
 
 #define CPU_ACPIID_INVALID	U32_MAX
 
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index 9a4d283347d1..f48d3a985972 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -1,12 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <asm/apic.h>
 
-static __always_inline u32 arch_sbm_cpu_to_idx(unsigned int cpu)
-{
-	return cpuid_to_apicid[cpu];
-}
-
-static __always_inline u32 arch_sbm_idx_to_cpu(unsigned int idx)
-{
-	return apicid_to_cpuid[idx];
-}
+#define arch_sbm_cpu_to_idx(cpu) \
+    ((u32)(cpuid_to_apicid[(cpu)]))
+#define arch_sbm_idx_to_cpu(idx) \
+    ((u32)(apicid_to_cpuid[(idx)]))
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 51a95b07831f..fa59fa6828a6 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -21,6 +21,8 @@
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sbm.h>
+
 #include "cpu.h"
 
 /* Shared last level cache maps */
@@ -317,12 +319,16 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
 	if (c->x86 < 0x17) {
 		/* Pre-Zen: LLC is at the node level */
 		c->topo.llc_id = die_id;
+		if (c == &boot_cpu_data)
+			arch_sbm_shift = topology_get_domain_shift(TOPO_DIE_DOMAIN);
 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 		/*
 		 * Family 17h up to 1F models: LLC is at the core
 		 * complex level.  Core complex ID is ApicId[3].
 		 */
 		c->topo.llc_id = c->topo.apicid >> 3;
+		if (c == &boot_cpu_data)
+			arch_sbm_shift = 3;
 	} else {
 		/*
 		 * Newer families: LLC ID is calculated from the number
@@ -331,8 +337,11 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
 		u32 llc_index = find_num_cache_leaves(c) - 1;
 		struct _cpuid4_info id4 = {};
 
-		if (!amd_fill_cpuid4_info(llc_index, &id4))
+		if (!amd_fill_cpuid4_info(llc_index, &id4)) {
 			c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
+			if (c == &boot_cpu_data)
+				arch_sbm_shift = get_count_order(1 + id4.eax.split.num_threads_sharing);
+		}
 	}
 }
 
@@ -346,6 +355,8 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
 	 * at the core complex level.  Core complex ID is ApicId[3].
 	 */
 	c->topo.llc_id = c->topo.apicid >> 3;
+	if (c == &boot_cpu_data)
+		arch_sbm_shift = 3;
 }
 
 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
@@ -425,6 +436,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 	unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
 	unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
+	unsigned int llc_nthreads = 0;
 
 	if (c->cpuid_level < 4)
 		return false;
@@ -461,6 +473,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
 		case 3:
 			l3 = id4.size / 1024;
 			l3_id = calc_cache_topo_id(c, &id4);
+			llc_nthreads = 1 + id4.eax.split.num_threads_sharing;
 			break;
 		default:
 			break;
@@ -469,6 +482,11 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
 
 	c->topo.l2c_id = l2_id;
 	c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
+
+	/* Save LLC shift for SBM (boot CPU only) */
+	if (c == &boot_cpu_data && llc_nthreads)
+		arch_sbm_shift = get_count_order(llc_nthreads);
+
 	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
 	return true;
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..5c590d8a3e78 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
 #include <linux/stackprotector.h>
 #include <linux/utsname.h>
 #include <linux/efi.h>
+#include <linux/sbm.h>
 
 #include <asm/alternative.h>
 #include <asm/cmdline.h>
@@ -70,6 +71,7 @@
 #include <asm/set_memory.h>
 #include <asm/traps.h>
 #include <asm/sev.h>
+#include <asm/apic.h>
 #include <asm/tdx.h>
 #include <asm/posted_intr.h>
 #include <asm/runtime-const.h>
@@ -2561,6 +2563,12 @@ void __init arch_cpu_finalize_init(void)
 
 	identify_boot_cpu();
 
+	arch_sbm_leafs = 1 + (sbm_max_apicid >> arch_sbm_shift);
+	arch_sbm_mask  = (1 << arch_sbm_shift) - 1;
+	arch_sbm_bits  = arch_sbm_shift;
+	pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n",
+		arch_sbm_shift, arch_sbm_leafs, sbm_max_apicid);
+
 	select_idle_routine();
 
 	/*
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 751b7517f2d5..9245456791b0 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -50,10 +50,7 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
 u32 apicid_to_cpuid[MAX_LOCAL_APIC] = { 0 };
 
-u32 arch_sbm_leafs	__ro_after_init;
-u32 arch_sbm_shift	__ro_after_init;
-u32 arch_sbm_mask	__ro_after_init;
-u32 arch_sbm_bits	__ro_after_init;
+u32 sbm_max_apicid	__ro_after_init;
 
 /* Bitmaps to mark registered APICs at each topology domain */
 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
@@ -565,12 +562,7 @@ void __init topology_init_possible_cpus(void)
 	for_each_possible_cpu(cpu)
 		apicid = max(apicid, cpuid_to_apicid[cpu]);
 
-	arch_sbm_shift = x86_topo_system.dom_shifts[TOPO_DIE_DOMAIN - 1];
-	arch_sbm_leafs = 1 + (apicid >> arch_sbm_shift);
-	arch_sbm_mask = (1 << arch_sbm_shift) - 1;
-	arch_sbm_bits = arch_sbm_shift;
-
-	pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n", arch_sbm_shift, arch_sbm_leafs, apicid);
+	sbm_max_apicid = apicid;
 }
 
 /*
diff --git a/include/linux/sbm.h b/include/linux/sbm.h
index 8d60f4bc7004..be940bcf1ae9 100644
--- a/include/linux/sbm.h
+++ b/include/linux/sbm.h
@@ -12,8 +12,13 @@ extern unsigned int arch_sbm_shift;
 extern unsigned int arch_sbm_mask;
 extern unsigned int arch_sbm_bits;
 
-extern unsigned int arch_sbm_cpu_to_idx(unsigned int cpu);
-extern unsigned int arch_sbm_idx_to_cpu(unsigned int idx);
+#ifndef arch_sbm_cpu_to_idx
+#define arch_sbm_cpu_to_idx(cpu) (cpu)
+#endif
+
+#ifndef arch_sbm_idx_to_cpu
+#define arch_sbm_idx_to_(idx) (idx)
+#endif
 
 enum sbm_type {
 	st_root = 0,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496dff740dca..2be95fa3c002 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8567,6 +8567,8 @@ void __init sched_init_smp(void)
 
 	sched_init_dl_servers();
 
+	init_sched_fair_class_smp();
+
 	sched_smp_initialized = true;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 474ee0362998..ae95610721b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -14033,6 +14033,12 @@ __init void init_sched_fair_class(void)
 #ifdef CONFIG_NO_HZ_COMMON
 	nohz.next_balance = jiffies;
 	nohz.next_blocked = jiffies;
+#endif
+}
+
+void __init init_sched_fair_class_smp(void)
+{
+#ifdef CONFIG_NO_HZ_COMMON
 	nohz.sbm = sbm_alloc();
 #endif
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 43bbf0693cca..9e45396a1512 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void update_max_interval(void);
 extern void init_sched_dl_class(void);
 extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
+extern void init_sched_fair_class_smp(void);
 
 extern void resched_curr(struct rq *rq);
 extern void resched_curr_lazy(struct rq *rq);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 32dcddaead82..f62a10c869fa 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -5,6 +5,7 @@
 
 #include <linux/sched/isolation.h>
 #include <linux/bsearch.h>
+#include <linux/sbm.h>
 #include "sched.h"
 
 DEFINE_MUTEX(sched_domains_mutex);
@@ -21,6 +22,11 @@ void sched_domains_mutex_unlock(void)
 static cpumask_var_t sched_domains_tmpmask;
 static cpumask_var_t sched_domains_tmpmask2;
 
+u32 arch_sbm_leafs     __ro_after_init;
+u32 arch_sbm_shift     __ro_after_init;
+u32 arch_sbm_mask      __ro_after_init;
+u32 arch_sbm_bits      __ro_after_init;
+
 static int __init sched_debug_setup(char *str)
 {
 	sched_debug_verbose = true;
diff --git a/lib/sbm.c b/lib/sbm.c
index 76670ce14291..45003e7b5621 100644
--- a/lib/sbm.c
+++ b/lib/sbm.c
@@ -1,13 +1,33 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/sbm.h>
+#include <linux/cpumask.h>
 
 struct sbm *sbm_alloc(void)
 {
-	unsigned int nr = arch_sbm_leafs;
-	unsigned int nbits = 1U << arch_sbm_shift;
-	unsigned int nlongs = BITS_TO_LONGS(nbits);
-	struct sbm_root *root = kzalloc_flex(*root, leafs, nr);
+	unsigned int nr;
+	unsigned int nbits;
+	unsigned int nlongs;
+	struct sbm_root *root;
 	struct sbm_leaf *leaf;
+
+	if (!arch_sbm_shift) {
+		unsigned int max_idx = num_possible_cpus();
+
+		/*
+		 * unsigned long is the base unit for bitmap in sbm_leaf.
+		 * Use that for default bitmap size for compact bitmap
+		 * without unused bits.
+		 */
+		arch_sbm_shift = BYTES_TO_BITS(sizeof(unsigned long));
+		arch_sbm_leafs = 1 + (max_idx >> arch_sbm_shift);
+		arch_sbm_mask = (1 << arch_sbm_shift) - 1;
+		arch_sbm_bits = arch_sbm_shift;
+	}
+
+	nr = arch_sbm_leafs;
+	nbits = 1U << arch_sbm_shift;
+	nlongs = BITS_TO_LONGS(nbits);
+	root = kzalloc_flex(*root, leafs, nr);
 	if (!root)
 		return NULL;
 
-- 
2.25.1


  parent reply	other threads:[~2026-05-10 16:08 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <729726b9-c669-41e2-887d-bdf9da703034@amd.com>
2026-05-10 15:59 ` [PATCH v2 1/4] sched/rt: Optimize cpupri_vec layout to mitigate cache line contention Chen Yu
2026-05-10 15:59   ` [PATCH 1/3] x86/sbm: Fix domain shift calculation and sbm_find_next_bit() Chen Yu
2026-05-10 15:59   ` [PATCH 2/3] lib/sbm: Use dynamically sized bitmap in sbm_leaf Chen Yu
2026-05-10 15:59   ` Chen Yu [this message]
2026-05-11  7:48     ` [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain K Prateek Nayak
2026-05-12  9:29       ` Chen, Yu C

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260510155920.2587431-4-yu.c.chen@intel.com \
    --to=yu.c.chen@intel.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pan.deng@intel.com \
    --cc=peterz@infradead.org \
    --cc=tianyou.li@intel.com \
    --cc=tim.c.chen@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox