From: K Prateek Nayak <kprateek.nayak@amd.com>
To: "Chen, Yu C" <yu.c.chen@intel.com>
Cc: <linux-tip-commits@vger.kernel.org>,
<linux-kernel@vger.kernel.org>,
Shrikanth Hegde <sshegde@linux.ibm.com>,
Valentin Schneider <vschneid@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>, <x86@kernel.org>,
Nathan Chancellor <nathan@kernel.org>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [tip: sched/core] sched/topology: Compute sd_weight considering cpuset partitions
Date: Sat, 21 Mar 2026 14:29:05 +0530 [thread overview]
Message-ID: <7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com> (raw)
In-Reply-To: <470ce693-ee2e-414e-930b-d6581d649110@intel.com>
Hello Chenyu,
On 3/21/2026 1:17 PM, Chen, Yu C wrote:
> On 3/21/2026 3:33 PM, Chen, Yu C wrote:
>> On 3/21/2026 11:36 AM, K Prateek Nayak wrote:
>>> sd->span_weight = cpumask_weight(sched_domain_span(sd));
>>>
>>> which should have crashed too if we had a NULL pointer in the
>>> cpumask range. So I'm at a loss. Maybe the pc points to a
>>> different location in your build?
>>>
>>
>> A wild guess, the major change is that we access sd->span, before
>> initializing the sd structure with *sd = { ... }. The sd is allocated
>> via alloc_percpu() uninitialized, the span at the end of the sd structure
>> remain uninitialized. It is unclear how cpumask_weight(sd->span) might be
>> affected by this uninitialized state. Before this patch, after *sd = { ... }
>> is executed, the contents of sd->span are explicitly set to 0, which might
>> be safer?
>>
>
> I replied too fast, please ignore above comments, the sd->span should have been
> set via cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu))
So I managed to reproduce the crash and it is actually crashing at:
last->next = first;
in build_sched_groups(). If I print the span befora nd after we do
the *sd = { ... }, I see:
[ 0.056301] span before: 0
[ 0.056559] span after:
[ 0.056686] span double check:
double check does a cpumask_pr_args(sched_domain_span(sd)).
This solves the crash on top of this patch:
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 79bab80af8f2..b347ae5d2786 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1693,6 +1693,8 @@ sd_init(struct sched_domain_topology_level *tl,
.name = tl->name,
};
+ cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
+
WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
(SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
"CPU capacity asymmetry not supported on SMT\n");
---
And I see:
[ 0.056479] span before: 0
[ 0.056749] span after: 0
[ 0.056881] span double check: 0
But since span[] is a variable array at the end of sched_domain struct,
doing a *sd = { ... } shouldn't modify it since the size isn't known at
compile time and the compiler will only overwrite the fixed fields.
Is there a compiler angle I'm missing here?
The cpumask_and() that comes first looks like:
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
ldr r3, [r9] @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ kernel/sched/topology.c:1646: u64 now = sched_clock();
strd r0, [sp, #28] @,,
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
mov r1, r6 @, i
mov r0, r9 @, ivtmp.1798
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
mov r4, fp @ tmp740, sd
@ kernel/sched/topology.c:1649: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
blx r3 @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
ldr r3, [r0] @ MEM[(const long unsigned int *)_356], MEM[(const long unsigned int *)_356]
ldr r0, [r7] @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
and r0, r0, r3 @ tmp736, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_356]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
uxth r0, r0 @ _360, tmp736
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
str r0, [r4, #292]! @ _360, MEM[(long unsigned int *)sd_352 + 292B]
---
*sd assignment looks as follows in my disassembly:
.L1867:
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
ldr ip, [sp, #48] @ tmp1203, %sfp
mov r2, #296 @,
mov r0, fp @, sd
mov r1, #0 @,
ldr r3, [ip] @ jiffies.324_453, jiffies
str r3, [sp, #36] @ jiffies.324_453, %sfp
ldr ip, [ip] @ jiffies.326_454, jiffies
@ kernel/sched/topology.c:1693: .name = tl->name,
ldr r3, [r9, #28] @ _455, MEM[(char * *)tl_317 + 28B]
str r3, [sp, #16] @ _455, %sfp
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str ip, [sp, #8] @ jiffies.326_454, %sfp
bl memset @
ldr r3, [sp, #36] @ jiffies.324_453, %sfp
ldr r2, [sp, #28] @ now, %sfp
str r3, [fp, #48] @ jiffies.324_453, sd_352->last_balance
ldr r3, [sp, #16] @ _455, %sfp
ldr ip, [sp, #8] @ jiffies.326_454, %sfp
str r2, [fp, #72] @ now, sd_352->newidle_stamp
str r3, [fp, #272] @ _455, sd_352->name
mov r3, #16 @ tmp1502,
ldr r2, [sp, #32] @ now, %sfp
str r3, [fp, #20] @ tmp1502, sd_352->busy_factor
@ kernel/sched/topology.c:1678: | sd_flags
orr r3, r4, #4096 @ _452, sd_flags,
@ kernel/sched/topology.c:1696: WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
and r4, r4, #160 @ tmp779, sd_flags,
@ kernel/sched/topology.c:1678: | sd_flags
orr r3, r3, #23 @ _452, _452,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str r2, [fp, #76] @ now, sd_352->newidle_stamp
@ kernel/sched/topology.c:1696: WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
cmp r4, #160 @ tmp779,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
mov r2, #512 @ tmp776,
str ip, [fp, #88] @ jiffies.326_454, sd_352->last_decay_max_lb_cost
str r2, [fp, #60] @ tmp776, sd_352->newidle_call
str r2, [fp, #68] @ tmp776, sd_352->newidle_ratio
@ kernel/sched/topology.c:1662: .max_interval = 2*sd_weight,
lsl r2, r10, #1 @ tmp773, _484,
@ kernel/sched/topology.c:1660: *sd = (struct sched_domain){
str r5, [fp, #4] @ sd, sd_352->child
str r2, [fp, #16] @ tmp773, sd_352->max_interval
mov r2, #117 @ tmp775,
str r10, [fp, #12] @ _484, sd_352->min_interval
str r2, [fp, #24] @ tmp775, sd_352->imbalance_pct
mov r2, #256 @ tmp777,
str r10, [fp, #52] @ _484, sd_352->balance_interval
str r3, [fp, #40] @ _452, sd_352->flags
str r2, [fp, #64] @ tmp777, sd_352->newidle_success
---
If I add the new cpumask_and() I get the following after *sd assignment:
@ kernel/sched/topology.c:1696: cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
ldr r3, [r9] @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
blx r3 @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
ldr r3, [r7] @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
ldr r2, [r0] @ MEM[(const long unsigned int *)_457], MEM[(const long unsigned int *)_457]
and r3, r3, r2 @ tmp788, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_457]
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
uxth r3, r3 @ tmp791, tmp788
@ ./include/linux/bitmap.h:329: return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
str r3, [fp, #292] @ tmp791, MEM[(long unsigned int *)sd_352 + 292B]
---
Both cpumask_and() seems to store to:
MEM[(long unsigned int *)sd_352 + 292B]
So I'm at a loss why this happens. Let me dig little more.
--
Thanks and Regards,
Prateek
next prev parent reply other threads:[~2026-03-21 8:59 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-12 4:44 [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 1/9] sched/topology: Compute sd_weight considering cpuset partitions K Prateek Nayak
2026-03-12 9:34 ` Peter Zijlstra
2026-03-12 9:59 ` K Prateek Nayak
2026-03-12 10:01 ` Peter Zijlstra
2026-03-12 10:09 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-20 23:58 ` Nathan Chancellor
2026-03-21 3:36 ` K Prateek Nayak
2026-03-21 7:33 ` Chen, Yu C
2026-03-21 7:47 ` Chen, Yu C
2026-03-21 8:59 ` K Prateek Nayak [this message]
2026-03-21 9:45 ` K Prateek Nayak
2026-03-21 10:13 ` K Prateek Nayak
2026-03-21 12:48 ` Chen, Yu C
2026-03-24 2:54 ` K Prateek Nayak
2026-03-21 14:13 ` Shrikanth Hegde
2026-03-21 15:14 ` K Prateek Nayak
2026-03-21 16:38 ` [PATCH] sched/topology: Initialize sd_span after assignment to *sd K Prateek Nayak
2026-03-23 9:08 ` Shrikanth Hegde
2026-03-23 17:34 ` K Prateek Nayak
2026-03-23 9:36 ` Peter Zijlstra
2026-03-23 13:24 ` Jon Hunter
2026-03-23 15:36 ` Chen, Yu C
2026-03-23 17:24 ` K Prateek Nayak
2026-03-23 22:41 ` Nathan Chancellor
2026-03-24 9:10 ` [tip: sched/core] sched/topology: Fix sched_domain_span() tip-bot2 for Peter Zijlstra
2026-03-12 4:44 ` [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper K Prateek Nayak
2026-03-12 13:37 ` kernel test robot
2026-03-12 15:42 ` K Prateek Nayak
2026-03-12 16:02 ` Peter Zijlstra
2026-03-16 0:18 ` Dietmar Eggemann
2026-03-16 3:41 ` K Prateek Nayak
2026-03-16 8:24 ` Dietmar Eggemann
2026-03-16 8:50 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 3/9] sched/topology: Allocate per-CPU sched_domain_shared in s_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 4/9] sched/topology: Switch to assigning "sd->shared" from s_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 5/9] sched/topology: Remove sched_domain_shared allocation with sd_data K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 6/9] sched/core: Check for rcu_read_lock_any_held() in idle_get_state() K Prateek Nayak
2026-03-12 9:46 ` Peter Zijlstra
2026-03-12 10:06 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path K Prateek Nayak
2026-03-15 23:36 ` Dietmar Eggemann
2026-03-16 3:19 ` K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] PM: EM: Switch to rcu_dereference_all() in " tip-bot2 for Dietmar Eggemann
2026-03-18 8:08 ` [tip: sched/core] sched/fair: Remove superfluous rcu_read_lock() in the " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 8/9] sched/fair: Simplify the entry condition for update_idle_cpu_scan() K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12 4:44 ` [PATCH v4 9/9] sched/fair: Simplify SIS_UTIL handling in select_idle_cpu() K Prateek Nayak
2026-03-18 8:08 ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-16 0:22 ` [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation Dietmar Eggemann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com \
--to=kprateek.nayak@amd.com \
--cc=dietmar.eggemann@arm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=nathan@kernel.org \
--cc=peterz@infradead.org \
--cc=sshegde@linux.ibm.com \
--cc=vschneid@redhat.com \
--cc=x86@kernel.org \
--cc=yu.c.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.