From: Chen Yu <yu.c.chen@intel.com>
To: kprateek.nayak@amd.com, tim.c.chen@linux.intel.com, peterz@infradead.org
Cc: pan.deng@intel.com, mingo@kernel.org,
linux-kernel@vger.kernel.org, tianyou.li@intel.com
Subject: [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain
Date: Sun, 10 May 2026 23:59:19 +0800 [thread overview]
Message-ID: <20260510155920.2587431-4-yu.c.chen@intel.com> (raw)
In-Reply-To: <20260510155920.2587431-1-yu.c.chen@intel.com>
From: Tim Chen <tim.c.chen@linux.intel.com>
Instead of using topology domain shifts (TOPO_TILE_DOMAIN) which may not
align with the actual LLC boundary, derive SBM parameters from the
authoritative LLC cache identification that the kernel already performs
during boot CPU identification.
- intel_cacheinfo_0x4(): iterates CPUID 0x04 leaves, identifies L2/L3
- cacheinfo_amd_init_llc_id(): identifies LLC for AMD/Hygon CPUs
Besides, if sbm is not defined for an architecture, define a
default implementation for it similar to cpumask. Rename
arch_sbm_max_apicid to sbm_max_apicid and put it in x86
file as it is specific for x86.
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/include/asm/sbm.h | 13 ++++---------
arch/x86/kernel/cpu/cacheinfo.c | 20 +++++++++++++++++++-
arch/x86/kernel/cpu/common.c | 8 ++++++++
arch/x86/kernel/cpu/topology.c | 12 ++----------
include/linux/sbm.h | 9 +++++++--
kernel/sched/core.c | 2 ++
kernel/sched/fair.c | 6 ++++++
kernel/sched/sched.h | 1 +
kernel/sched/topology.c | 6 ++++++
lib/sbm.c | 28 ++++++++++++++++++++++++----
11 files changed, 80 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 24012a91ac1e..90406d8d6af1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -55,6 +55,7 @@ static inline void x86_32_probe_apic(void) { }
extern u32 cpuid_to_apicid[];
extern u32 apicid_to_cpuid[];
+extern unsigned int sbm_max_apicid;
#define CPU_ACPIID_INVALID U32_MAX
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index 9a4d283347d1..f48d3a985972 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -1,12 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/apic.h>
-static __always_inline u32 arch_sbm_cpu_to_idx(unsigned int cpu)
-{
- return cpuid_to_apicid[cpu];
-}
-
-static __always_inline u32 arch_sbm_idx_to_cpu(unsigned int idx)
-{
- return apicid_to_cpuid[idx];
-}
+#define arch_sbm_cpu_to_idx(cpu) \
+ ((u32)(cpuid_to_apicid[(cpu)]))
+#define arch_sbm_idx_to_cpu(idx) \
+ ((u32)(apicid_to_cpuid[(idx)]))
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 51a95b07831f..fa59fa6828a6 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -21,6 +21,8 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
+#include <linux/sbm.h>
+
#include "cpu.h"
/* Shared last level cache maps */
@@ -317,12 +319,16 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
if (c->x86 < 0x17) {
/* Pre-Zen: LLC is at the node level */
c->topo.llc_id = die_id;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = topology_get_domain_shift(TOPO_DIE_DOMAIN);
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* Family 17h up to 1F models: LLC is at the core
* complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = 3;
} else {
/*
* Newer families: LLC ID is calculated from the number
@@ -331,8 +337,11 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
u32 llc_index = find_num_cache_leaves(c) - 1;
struct _cpuid4_info id4 = {};
- if (!amd_fill_cpuid4_info(llc_index, &id4))
+ if (!amd_fill_cpuid4_info(llc_index, &id4)) {
c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = get_count_order(1 + id4.eax.split.num_threads_sharing);
+ }
}
}
@@ -346,6 +355,8 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
* at the core complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = 3;
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
@@ -425,6 +436,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
+ unsigned int llc_nthreads = 0;
if (c->cpuid_level < 4)
return false;
@@ -461,6 +473,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
case 3:
l3 = id4.size / 1024;
l3_id = calc_cache_topo_id(c, &id4);
+ llc_nthreads = 1 + id4.eax.split.num_threads_sharing;
break;
default:
break;
@@ -469,6 +482,11 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
c->topo.l2c_id = l2_id;
c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
+
+ /* Save LLC shift for SBM (boot CPU only) */
+ if (c == &boot_cpu_data && llc_nthreads)
+ arch_sbm_shift = get_count_order(llc_nthreads);
+
intel_cacheinfo_done(c, l3, l2, l1i, l1d);
return true;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..5c590d8a3e78 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
#include <linux/stackprotector.h>
#include <linux/utsname.h>
#include <linux/efi.h>
+#include <linux/sbm.h>
#include <asm/alternative.h>
#include <asm/cmdline.h>
@@ -70,6 +71,7 @@
#include <asm/set_memory.h>
#include <asm/traps.h>
#include <asm/sev.h>
+#include <asm/apic.h>
#include <asm/tdx.h>
#include <asm/posted_intr.h>
#include <asm/runtime-const.h>
@@ -2561,6 +2563,12 @@ void __init arch_cpu_finalize_init(void)
identify_boot_cpu();
+ arch_sbm_leafs = 1 + (sbm_max_apicid >> arch_sbm_shift);
+ arch_sbm_mask = (1 << arch_sbm_shift) - 1;
+ arch_sbm_bits = arch_sbm_shift;
+ pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n",
+ arch_sbm_shift, arch_sbm_leafs, sbm_max_apicid);
+
select_idle_routine();
/*
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 751b7517f2d5..9245456791b0 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -50,10 +50,7 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
u32 apicid_to_cpuid[MAX_LOCAL_APIC] = { 0 };
-u32 arch_sbm_leafs __ro_after_init;
-u32 arch_sbm_shift __ro_after_init;
-u32 arch_sbm_mask __ro_after_init;
-u32 arch_sbm_bits __ro_after_init;
+u32 sbm_max_apicid __ro_after_init;
/* Bitmaps to mark registered APICs at each topology domain */
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
@@ -565,12 +562,7 @@ void __init topology_init_possible_cpus(void)
for_each_possible_cpu(cpu)
apicid = max(apicid, cpuid_to_apicid[cpu]);
- arch_sbm_shift = x86_topo_system.dom_shifts[TOPO_DIE_DOMAIN - 1];
- arch_sbm_leafs = 1 + (apicid >> arch_sbm_shift);
- arch_sbm_mask = (1 << arch_sbm_shift) - 1;
- arch_sbm_bits = arch_sbm_shift;
-
- pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n", arch_sbm_shift, arch_sbm_leafs, apicid);
+ sbm_max_apicid = apicid;
}
/*
diff --git a/include/linux/sbm.h b/include/linux/sbm.h
index 8d60f4bc7004..be940bcf1ae9 100644
--- a/include/linux/sbm.h
+++ b/include/linux/sbm.h
@@ -12,8 +12,13 @@ extern unsigned int arch_sbm_shift;
extern unsigned int arch_sbm_mask;
extern unsigned int arch_sbm_bits;
-extern unsigned int arch_sbm_cpu_to_idx(unsigned int cpu);
-extern unsigned int arch_sbm_idx_to_cpu(unsigned int idx);
+#ifndef arch_sbm_cpu_to_idx
+#define arch_sbm_cpu_to_idx(cpu) (cpu)
+#endif
+
+#ifndef arch_sbm_idx_to_cpu
+#define arch_sbm_idx_to_(idx) (idx)
+#endif
enum sbm_type {
st_root = 0,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496dff740dca..2be95fa3c002 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8567,6 +8567,8 @@ void __init sched_init_smp(void)
sched_init_dl_servers();
+ init_sched_fair_class_smp();
+
sched_smp_initialized = true;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 474ee0362998..ae95610721b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -14033,6 +14033,12 @@ __init void init_sched_fair_class(void)
#ifdef CONFIG_NO_HZ_COMMON
nohz.next_balance = jiffies;
nohz.next_blocked = jiffies;
+#endif
+}
+
+void __init init_sched_fair_class_smp(void)
+{
+#ifdef CONFIG_NO_HZ_COMMON
nohz.sbm = sbm_alloc();
#endif
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 43bbf0693cca..9e45396a1512 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void update_max_interval(void);
extern void init_sched_dl_class(void);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
+extern void init_sched_fair_class_smp(void);
extern void resched_curr(struct rq *rq);
extern void resched_curr_lazy(struct rq *rq);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 32dcddaead82..f62a10c869fa 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -5,6 +5,7 @@
#include <linux/sched/isolation.h>
#include <linux/bsearch.h>
+#include <linux/sbm.h>
#include "sched.h"
DEFINE_MUTEX(sched_domains_mutex);
@@ -21,6 +22,11 @@ void sched_domains_mutex_unlock(void)
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
+u32 arch_sbm_leafs __ro_after_init;
+u32 arch_sbm_shift __ro_after_init;
+u32 arch_sbm_mask __ro_after_init;
+u32 arch_sbm_bits __ro_after_init;
+
static int __init sched_debug_setup(char *str)
{
sched_debug_verbose = true;
diff --git a/lib/sbm.c b/lib/sbm.c
index 76670ce14291..45003e7b5621 100644
--- a/lib/sbm.c
+++ b/lib/sbm.c
@@ -1,13 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/sbm.h>
+#include <linux/cpumask.h>
struct sbm *sbm_alloc(void)
{
- unsigned int nr = arch_sbm_leafs;
- unsigned int nbits = 1U << arch_sbm_shift;
- unsigned int nlongs = BITS_TO_LONGS(nbits);
- struct sbm_root *root = kzalloc_flex(*root, leafs, nr);
+ unsigned int nr;
+ unsigned int nbits;
+ unsigned int nlongs;
+ struct sbm_root *root;
struct sbm_leaf *leaf;
+
+ if (!arch_sbm_shift) {
+ unsigned int max_idx = num_possible_cpus();
+
+ /*
+ * unsigned long is the base unit for bitmap in sbm_leaf.
+ * Use that for default bitmap size for compact bitmap
+ * without unused bits.
+ */
+ arch_sbm_shift = BYTES_TO_BITS(sizeof(unsigned long));
+ arch_sbm_leafs = 1 + (max_idx >> arch_sbm_shift);
+ arch_sbm_mask = (1 << arch_sbm_shift) - 1;
+ arch_sbm_bits = arch_sbm_shift;
+ }
+
+ nr = arch_sbm_leafs;
+ nbits = 1U << arch_sbm_shift;
+ nlongs = BITS_TO_LONGS(nbits);
+ root = kzalloc_flex(*root, leafs, nr);
if (!root)
return NULL;
--
2.25.1
next prev parent reply other threads:[~2026-05-10 16:08 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <729726b9-c669-41e2-887d-bdf9da703034@amd.com>
2026-05-10 15:59 ` [PATCH v2 1/4] sched/rt: Optimize cpupri_vec layout to mitigate cache line contention Chen Yu
2026-05-10 15:59 ` [PATCH 1/3] x86/sbm: Fix domain shift calculation and sbm_find_next_bit() Chen Yu
2026-05-10 15:59 ` [PATCH 2/3] lib/sbm: Use dynamically sized bitmap in sbm_leaf Chen Yu
2026-05-10 15:59 ` Chen Yu [this message]
2026-05-11 7:48 ` [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain K Prateek Nayak
2026-05-12 9:29 ` Chen, Yu C
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260510155920.2587431-4-yu.c.chen@intel.com \
--to=yu.c.chen@intel.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pan.deng@intel.com \
--cc=peterz@infradead.org \
--cc=tianyou.li@intel.com \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox