From: K Prateek Nayak <kprateek.nayak@amd.com>
To: "Chen, Yu C" <yu.c.chen@intel.com>
Cc: <linux-tip-commits@vger.kernel.org>,
<linux-kernel@vger.kernel.org>,
Shrikanth Hegde <sshegde@linux.ibm.com>,
Valentin Schneider <vschneid@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>, <x86@kernel.org>,
Nathan Chancellor <nathan@kernel.org>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [tip: sched/core] sched/topology: Compute sd_weight considering cpuset partitions
Date: Sat, 21 Mar 2026 14:29:05 +0530 [thread overview]
Message-ID: <7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com> (raw)
In-Reply-To: <470ce693-ee2e-414e-930b-d6581d649110@intel.com>
Hello Chenyu,
On 3/21/2026 1:17 PM, Chen, Yu C wrote:
> On 3/21/2026 3:33 PM, Chen, Yu C wrote:
>> On 3/21/2026 11:36 AM, K Prateek Nayak wrote:
>>> sd->span_weight = cpumask_weight(sched_domain_span(sd));
>>>
>>> which should have crashed too if we had a NULL pointer in the
>>> cpumask range. So I'm at a loss. Maybe the pc points to a
>>> different location in your build?
>>>
>>
>> A wild guess, the major change is that we access sd->span, before
>> initializing the sd structure with *sd = { ... }. The sd is allocated
>> via alloc_percpu() uninitialized, the span at the end of the sd structure
>> remain uninitialized. It is unclear how cpumask_weight(sd->span) might be
>> affected by this uninitialized state. Before this patch, after *sd = { ... }
>> is executed, the contents of sd->span are explicitly set to 0, which might
>> be safer?
>>
>
> I replied too fast, please ignore above comments, the sd->span should have been
> set via cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu))
So I managed to reproduce the crash and it is actually crashing at:
last->next = first;
in build_sched_groups(). If I print the span befora nd after we do
the *sd = { ... }, I see:
[ 0.056301] span before: 0
[ 0.056559] span after:
[ 0.056686] span double check:
double check does a cpumask_pr_args(sched_domain_span(sd)).
This solves the crash on top of this patch:
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 79bab80af8f2..b347ae5d2786 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1693,6 +1693,8 @@ sd_init(struct sched_domain_topology_level *tl,
.name = tl->name,
};
+ cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
+
WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
(SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
"CPU capacity asymmetry not supported on SMT\n");
---
And I see:
[ 0.056479] span before: 0
[ 0.056749] span after: 0
[ 0.056881] span double check: 0
But since span[] is a variable array at the end of sched_domain struct,
doing a *sd = { ... } shouldn't modify it since the size isn't known at
compile time and the compiler will only overwrite the fixed fields.
Is there a compiler angle I'm missing here?
The cpumask_and() that comes first looks like:
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
ldr r3, [r9] @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ kernel/sched/topology.c:1646: u64 now = sched_clock();
strd r0, [sp, #28] @,,
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
mov r1, r6 @, i
mov r0, r9 @, ivtmp.1798
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
mov r4, fp @ tmp740, sd
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
blx r3 @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
ldr r3, [r0] @ MEM[(const long unsigned int *)_356], MEM[(const long unsigned int *)_356]
ldr r0, [r7] @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
and r0, r0, r3 @ tmp736, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_356]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
uxth r0, r0 @ _360, tmp736
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
str r0, [r4, #292]! @ _360, MEM[(long unsigned int *)sd_352 + 292B]
---
*sd assignment looks as follows in my disassembly:
.L1867:
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
ldr ip, [sp, #48] @ tmp1203, %sfp
mov r2, #296 @,
mov r0, fp @, sd
mov r1, #0 @,
ldr r3, [ip] @ jiffies.324_453, jiffies
str r3, [sp, #36] @ jiffies.324_453, %sfp
ldr ip, [ip] @ jiffies.326_454, jiffies
@ kernel/sched/topology.c:1693: .name = tl->name,
ldr r3, [r9, #28] @ _455, MEM[(char * *)tl_317 + 28B]
str r3, [sp, #16] @ _455, %sfp
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str ip, [sp, #8] @ jiffies.326_454, %sfp
bl memset @
ldr r3, [sp, #36] @ jiffies.324_453, %sfp
ldr r2, [sp, #28] @ now, %sfp
str r3, [fp, #48] @ jiffies.324_453, sd_352->last_balance
ldr r3, [sp, #16] @ _455, %sfp
ldr ip, [sp, #8] @ jiffies.326_454, %sfp
str r2, [fp, #72] @ now, sd_352->newidle_stamp
str r3, [fp, #272] @ _455, sd_352->name
mov r3, #16 @ tmp1502,
ldr r2, [sp, #32] @ now, %sfp
str r3, [fp, #20] @ tmp1502, sd_352->busy_factor
@ kernel/sched/topology.c:1678: | sd_flags
orr r3, r4, #4096 @ _452, sd_flags,
@ kernel/sched/topology.c:1696: WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
and r4, r4, #160 @ tmp779, sd_flags,
@ kernel/sched/topology.c:1678: | sd_flags
orr r3, r3, #23 @ _452, _452,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str r2, [fp, #76] @ now, sd_352->newidle_stamp
@ kernel/sched/topology.c:1696: WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
cmp r4, #160 @ tmp779,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
mov r2, #512 @ tmp776,
str ip, [fp, #88] @ jiffies.326_454, sd_352->last_decay_max_lb_cost
str r2, [fp, #60] @ tmp776, sd_352->newidle_call
str r2, [fp, #68] @ tmp776, sd_352->newidle_ratio
@ kernel/sched/topology.c:1662: .max_interval = 2*sd_weight,
lsl r2, r10, #1 @ tmp773, _484,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str r5, [fp, #4] @ sd, sd_352->child
str r2, [fp, #16] @ tmp773, sd_352->max_interval
mov r2, #117 @ tmp775,
str r10, [fp, #12] @ _484, sd_352->min_interval
str r2, [fp, #24] @ tmp775, sd_352->imbalance_pct
mov r2, #256 @ tmp777,
str r10, [fp, #52] @ _484, sd_352->balance_interval
str r3, [fp, #40] @ _452, sd_352->flags
str r2, [fp, #64] @ tmp777, sd_352->newidle_success
---
If I add the new cpumask_and() I get the following after *sd assignment:
@ kernel/sched/topology.c:1696: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
ldr r3, [r9] @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
blx r3 @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
ldr r3, [r7] @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
ldr r2, [r0] @ MEM[(const long unsigned int *)_457], MEM[(const long unsigned int *)_457]
and r3, r3, r2 @ tmp788, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_457]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
uxth r3, r3 @ tmp791, tmp788
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
str r3, [fp, #292] @ tmp791, MEM[(long unsigned int *)sd_352 + 292B]
---
Both cpumask_and() seems to store to:
MEM[(long unsigned int *)sd_352 + 292B]
So I'm at a loss why this happens. Let me dig little more.
--
Thanks and Regards,
Prateek
next prev parent reply other threads:[~2026-03-21 8:59 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-12 4:44 [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 1/9] sched/topology: Compute sd_weight considering cpuset partitions K Prateek Nayak
2026-03-12 9:34 ` Peter Zijlstra
2026-03-12 9:59 ` K Prateek Nayak
2026-03-12 10:01 ` Peter Zijlstra
2026-03-12 10:09 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-20 23:58 ` Nathan Chancellor
2026-03-21 3:36 ` K Prateek Nayak
2026-03-21 7:33 ` Chen, Yu C
2026-03-21 7:47 ` Chen, Yu C
2026-03-21 8:59 ` K Prateek Nayak [this message]
2026-03-21 9:45 ` K Prateek Nayak
2026-03-21 10:13 ` K Prateek Nayak
2026-03-21 12:48 ` Chen, Yu C
2026-03-24 2:54 ` K Prateek Nayak
2026-03-21 14:13 ` Shrikanth Hegde
2026-03-21 15:14 ` K Prateek Nayak
2026-03-21 16:38 ` [PATCH] sched/topology: Initialize sd_span after assignment to *sd K Prateek Nayak
2026-03-23 9:08 ` Shrikanth Hegde
2026-03-23 17:34 ` K Prateek Nayak
2026-03-23 9:36 ` Peter Zijlstra
2026-03-23 13:24 ` Jon Hunter
2026-03-23 15:36 ` Chen, Yu C
2026-03-23 17:24 ` K Prateek Nayak
2026-03-23 22:41 ` Nathan Chancellor
2026-03-24 9:10 ` [tip: sched/core] sched/topology: Fix sched_domain_span() tip-bot2 for Peter Zijlstra
2026-03-12 4:44 ` [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper K Prateek Nayak
2026-03-12 13:37 ` kernel test robot
2026-03-12 15:42 ` K Prateek Nayak
2026-03-12 16:02 ` Peter Zijlstra
2026-03-16 0:18 ` Dietmar Eggemann
2026-03-16 3:41 ` K Prateek Nayak
2026-03-16 8:24 ` Dietmar Eggemann
2026-03-16 8:50 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 3/9] sched/topology: Allocate per-CPU sched_domain_shared in s_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 4/9] sched/topology: Switch to assigning "sd->shared" from s_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 5/9] sched/topology: Remove sched_domain_shared allocation with sd_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 6/9] sched/core: Check for rcu_read_lock_any_held() in idle_get_state() K Prateek Nayak
2026-03-12 9:46 ` Peter Zijlstra
2026-03-12 10:06 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path K Prateek Nayak
2026-03-15 23:36 ` Dietmar Eggemann
2026-03-16 3:19 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] PM: EM: Switch to rcu_dereference_all() in " tip-bot2 for Dietmar Eggemann
2026-03-18 8:08 ` [tip: sched/core] sched/fair: Remove superfluous rcu_read_lock() in the " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 8/9] sched/fair: Simplify the entry condition for update_idle_cpu_scan() K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 9/9] sched/fair: Simplify SIS_UTIL handling in select_idle_cpu() K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-16 0:22 ` [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation Dietmar Eggemann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com \
--to=kprateek.nayak@amd.com \
--cc=dietmar.eggemann@arm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=nathan@kernel.org \
--cc=peterz@infradead.org \
--cc=sshegde@linux.ibm.com \
--cc=vschneid@redhat.com \
--cc=x86@kernel.org \
--cc=yu.c.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox