From: Chen Yu <yu.c.chen@intel.com>
To: kprateek.nayak@amd.com, tim.c.chen@linux.intel.com, peterz@infradead.org
Cc: pan.deng@intel.com, mingo@kernel.org,
linux-kernel@vger.kernel.org, tianyou.li@intel.com
Subject: [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain
Date: Sun, 10 May 2026 23:59:19 +0800 [thread overview]
Message-ID: <20260510155920.2587431-4-yu.c.chen@intel.com> (raw)
In-Reply-To: <20260510155920.2587431-1-yu.c.chen@intel.com>
From: Tim Chen <tim.c.chen@linux.intel.com>
Instead of using topology domain shifts (TOPO_TILE_DOMAIN) which may not
align with the actual LLC boundary, derive SBM parameters from the
authoritative LLC cache identification that the kernel already performs
during boot CPU identification.
- intel_cacheinfo_0x4(): iterates CPUID 0x04 leaves, identifies L2/L3
- cacheinfo_amd_init_llc_id(): identifies LLC for AMD/Hygon CPUs
Besides, if sbm is not defined for an architecture, define a
default implementation for it similar to cpumask. Rename
arch_sbm_max_apicid to sbm_max_apicid and put it in x86
file as it is specific for x86.
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/include/asm/sbm.h | 13 ++++---------
arch/x86/kernel/cpu/cacheinfo.c | 20 +++++++++++++++++++-
arch/x86/kernel/cpu/common.c | 8 ++++++++
arch/x86/kernel/cpu/topology.c | 12 ++----------
include/linux/sbm.h | 9 +++++++--
kernel/sched/core.c | 2 ++
kernel/sched/fair.c | 6 ++++++
kernel/sched/sched.h | 1 +
kernel/sched/topology.c | 6 ++++++
lib/sbm.c | 28 ++++++++++++++++++++++++----
11 files changed, 80 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 24012a91ac1e..90406d8d6af1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -55,6 +55,7 @@ static inline void x86_32_probe_apic(void) { }
extern u32 cpuid_to_apicid[];
extern u32 apicid_to_cpuid[];
+extern unsigned int sbm_max_apicid;
#define CPU_ACPIID_INVALID U32_MAX
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index 9a4d283347d1..f48d3a985972 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -1,12 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/apic.h>
-static __always_inline u32 arch_sbm_cpu_to_idx(unsigned int cpu)
-{
- return cpuid_to_apicid[cpu];
-}
-
-static __always_inline u32 arch_sbm_idx_to_cpu(unsigned int idx)
-{
- return apicid_to_cpuid[idx];
-}
+#define arch_sbm_cpu_to_idx(cpu) \
+ ((u32)(cpuid_to_apicid[(cpu)]))
+#define arch_sbm_idx_to_cpu(idx) \
+ ((u32)(apicid_to_cpuid[(idx)]))
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 51a95b07831f..fa59fa6828a6 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -21,6 +21,8 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
+#include <linux/sbm.h>
+
#include "cpu.h"
/* Shared last level cache maps */
@@ -317,12 +319,16 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
if (c->x86 < 0x17) {
/* Pre-Zen: LLC is at the node level */
c->topo.llc_id = die_id;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = topology_get_domain_shift(TOPO_DIE_DOMAIN);
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* Family 17h up to 1F models: LLC is at the core
* complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = 3;
} else {
/*
* Newer families: LLC ID is calculated from the number
@@ -331,8 +337,11 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
u32 llc_index = find_num_cache_leaves(c) - 1;
struct _cpuid4_info id4 = {};
- if (!amd_fill_cpuid4_info(llc_index, &id4))
+ if (!amd_fill_cpuid4_info(llc_index, &id4)) {
c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = get_count_order(1 + id4.eax.split.num_threads_sharing);
+ }
}
}
@@ -346,6 +355,8 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
* at the core complex level. Core complex ID is ApicId[3].
*/
c->topo.llc_id = c->topo.apicid >> 3;
+ if (c == &boot_cpu_data)
+ arch_sbm_shift = 3;
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
@@ -425,6 +436,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0;
+ unsigned int llc_nthreads = 0;
if (c->cpuid_level < 4)
return false;
@@ -461,6 +473,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
case 3:
l3 = id4.size / 1024;
l3_id = calc_cache_topo_id(c, &id4);
+ llc_nthreads = 1 + id4.eax.split.num_threads_sharing;
break;
default:
break;
@@ -469,6 +482,11 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
c->topo.l2c_id = l2_id;
c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
+
+ /* Save LLC shift for SBM (boot CPU only) */
+ if (c == &boot_cpu_data && llc_nthreads)
+ arch_sbm_shift = get_count_order(llc_nthreads);
+
intel_cacheinfo_done(c, l3, l2, l1i, l1d);
return true;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..5c590d8a3e78 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
#include <linux/stackprotector.h>
#include <linux/utsname.h>
#include <linux/efi.h>
+#include <linux/sbm.h>
#include <asm/alternative.h>
#include <asm/cmdline.h>
@@ -70,6 +71,7 @@
#include <asm/set_memory.h>
#include <asm/traps.h>
#include <asm/sev.h>
+#include <asm/apic.h>
#include <asm/tdx.h>
#include <asm/posted_intr.h>
#include <asm/runtime-const.h>
@@ -2561,6 +2563,12 @@ void __init arch_cpu_finalize_init(void)
identify_boot_cpu();
+ arch_sbm_leafs = 1 + (sbm_max_apicid >> arch_sbm_shift);
+ arch_sbm_mask = (1 << arch_sbm_shift) - 1;
+ arch_sbm_bits = arch_sbm_shift;
+ pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n",
+ arch_sbm_shift, arch_sbm_leafs, sbm_max_apicid);
+
select_idle_routine();
/*
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 751b7517f2d5..9245456791b0 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -50,10 +50,7 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
u32 apicid_to_cpuid[MAX_LOCAL_APIC] = { 0 };
-u32 arch_sbm_leafs __ro_after_init;
-u32 arch_sbm_shift __ro_after_init;
-u32 arch_sbm_mask __ro_after_init;
-u32 arch_sbm_bits __ro_after_init;
+u32 sbm_max_apicid __ro_after_init;
/* Bitmaps to mark registered APICs at each topology domain */
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
@@ -565,12 +562,7 @@ void __init topology_init_possible_cpus(void)
for_each_possible_cpu(cpu)
apicid = max(apicid, cpuid_to_apicid[cpu]);
- arch_sbm_shift = x86_topo_system.dom_shifts[TOPO_DIE_DOMAIN - 1];
- arch_sbm_leafs = 1 + (apicid >> arch_sbm_shift);
- arch_sbm_mask = (1 << arch_sbm_shift) - 1;
- arch_sbm_bits = arch_sbm_shift;
-
- pr_info("SBM: shift(%d) leafs(%d) APIC(%x)\n", arch_sbm_shift, arch_sbm_leafs, apicid);
+ sbm_max_apicid = apicid;
}
/*
diff --git a/include/linux/sbm.h b/include/linux/sbm.h
index 8d60f4bc7004..be940bcf1ae9 100644
--- a/include/linux/sbm.h
+++ b/include/linux/sbm.h
@@ -12,8 +12,13 @@ extern unsigned int arch_sbm_shift;
extern unsigned int arch_sbm_mask;
extern unsigned int arch_sbm_bits;
-extern unsigned int arch_sbm_cpu_to_idx(unsigned int cpu);
-extern unsigned int arch_sbm_idx_to_cpu(unsigned int idx);
+#ifndef arch_sbm_cpu_to_idx
+#define arch_sbm_cpu_to_idx(cpu) (cpu)
+#endif
+
+#ifndef arch_sbm_idx_to_cpu
+#define arch_sbm_idx_to_(idx) (idx)
+#endif
enum sbm_type {
st_root = 0,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496dff740dca..2be95fa3c002 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8567,6 +8567,8 @@ void __init sched_init_smp(void)
sched_init_dl_servers();
+ init_sched_fair_class_smp();
+
sched_smp_initialized = true;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 474ee0362998..ae95610721b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -14033,6 +14033,12 @@ __init void init_sched_fair_class(void)
#ifdef CONFIG_NO_HZ_COMMON
nohz.next_balance = jiffies;
nohz.next_blocked = jiffies;
+#endif
+}
+
+void __init init_sched_fair_class_smp(void)
+{
+#ifdef CONFIG_NO_HZ_COMMON
nohz.sbm = sbm_alloc();
#endif
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 43bbf0693cca..9e45396a1512 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void update_max_interval(void);
extern void init_sched_dl_class(void);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
+extern void init_sched_fair_class_smp(void);
extern void resched_curr(struct rq *rq);
extern void resched_curr_lazy(struct rq *rq);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 32dcddaead82..f62a10c869fa 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -5,6 +5,7 @@
#include <linux/sched/isolation.h>
#include <linux/bsearch.h>
+#include <linux/sbm.h>
#include "sched.h"
DEFINE_MUTEX(sched_domains_mutex);
@@ -21,6 +22,11 @@ void sched_domains_mutex_unlock(void)
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
+u32 arch_sbm_leafs __ro_after_init;
+u32 arch_sbm_shift __ro_after_init;
+u32 arch_sbm_mask __ro_after_init;
+u32 arch_sbm_bits __ro_after_init;
+
static int __init sched_debug_setup(char *str)
{
sched_debug_verbose = true;
diff --git a/lib/sbm.c b/lib/sbm.c
index 76670ce14291..45003e7b5621 100644
--- a/lib/sbm.c
+++ b/lib/sbm.c
@@ -1,13 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/sbm.h>
+#include <linux/cpumask.h>
struct sbm *sbm_alloc(void)
{
- unsigned int nr = arch_sbm_leafs;
- unsigned int nbits = 1U << arch_sbm_shift;
- unsigned int nlongs = BITS_TO_LONGS(nbits);
- struct sbm_root *root = kzalloc_flex(*root, leafs, nr);
+ unsigned int nr;
+ unsigned int nbits;
+ unsigned int nlongs;
+ struct sbm_root *root;
struct sbm_leaf *leaf;
+
+ if (!arch_sbm_shift) {
+ unsigned int max_idx = num_possible_cpus();
+
+ /*
+ * unsigned long is the base unit for bitmap in sbm_leaf.
+ * Use that for default bitmap size for compact bitmap
+ * without unused bits.
+ */
+ arch_sbm_shift = BYTES_TO_BITS(sizeof(unsigned long));
+ arch_sbm_leafs = 1 + (max_idx >> arch_sbm_shift);
+ arch_sbm_mask = (1 << arch_sbm_shift) - 1;
+ arch_sbm_bits = arch_sbm_shift;
+ }
+
+ nr = arch_sbm_leafs;
+ nbits = 1U << arch_sbm_shift;
+ nlongs = BITS_TO_LONGS(nbits);
+ root = kzalloc_flex(*root, leafs, nr);
if (!root)
return NULL;
--
2.25.1
next prev parent reply other threads:[~2026-05-10 16:08 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <729726b9-c669-41e2-887d-bdf9da703034@amd.com>
2026-05-10 15:59 ` [PATCH v2 1/4] sched/rt: Optimize cpupri_vec layout to mitigate cache line contention Chen Yu
2026-05-10 15:59 ` [PATCH 1/3] x86/sbm: Fix domain shift calculation and sbm_find_next_bit() Chen Yu
2026-05-10 15:59 ` [PATCH 2/3] lib/sbm: Use dynamically sized bitmap in sbm_leaf Chen Yu
2026-05-10 15:59 ` Chen Yu [this message]
2026-05-11 7:48 ` [PATCH 3/3] x86/sbm: Derive leaf granularity from LLC cacheinfo instead of topology domain K Prateek Nayak
2026-05-12 9:29 ` Chen, Yu C
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260510155920.2587431-4-yu.c.chen@intel.com \
--to=yu.c.chen@intel.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pan.deng@intel.com \
--cc=peterz@infradead.org \
--cc=tianyou.li@intel.com \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.