public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: K Prateek Nayak <kprateek.nayak@amd.com>
To: "Chen, Yu C" <yu.c.chen@intel.com>
Cc: <linux-tip-commits@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>, <x86@kernel.org>,
	Nathan Chancellor <nathan@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [tip: sched/core] sched/topology: Compute sd_weight considering cpuset partitions
Date: Sat, 21 Mar 2026 14:29:05 +0530	[thread overview]
Message-ID: <7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com> (raw)
In-Reply-To: <470ce693-ee2e-414e-930b-d6581d649110@intel.com>

Hello Chenyu,

On 3/21/2026 1:17 PM, Chen, Yu C wrote:
> On 3/21/2026 3:33 PM, Chen, Yu C wrote:
>> On 3/21/2026 11:36 AM, K Prateek Nayak wrote:
>>>    sd->span_weight = cpumask_weight(sched_domain_span(sd));
>>>
>>> which should have crashed too if we had a NULL pointer in the
>>> cpumask range. So I'm at a loss. Maybe the pc points to a
>>> different location in your build?
>>>
>>
>> A wild guess, the major change is that we access sd->span, before
>> initializing  the sd structure with *sd = { ... }. The sd is allocated
>> via alloc_percpu() uninitialized, the span at the end of the sd structure
>> remain uninitialized. It is unclear how cpumask_weight(sd->span) might be
>> affected by this uninitialized state. Before this patch, after *sd = { ... }
>> is executed, the contents of  sd->span are explicitly set to 0, which might
>> be safer?
>>
> 
> I replied too fast, please ignore above comments, the sd->span should have been
> set via cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu))

So I managed to reproduce the crash and it is actually crashing at:

  last->next = first;

in build_sched_groups(). If I print the span befora nd after we do
the *sd = { ... }, I see:

  [    0.056301] span before: 0
  [    0.056559] span after:
  [    0.056686] span double check:

double check does a cpumask_pr_args(sched_domain_span(sd)).
This solves the crash on top of this patch:

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 79bab80af8f2..b347ae5d2786 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1693,6 +1693,8 @@ sd_init(struct sched_domain_topology_level *tl,
 		.name			= tl->name,
 	};
 
+	cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
+
 	WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
 		  (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
 		  "CPU capacity asymmetry not supported on SMT\n");
---

And I see:

  [    0.056479] span before: 0
  [    0.056749] span after: 0
  [    0.056881] span double check: 0


But since span[] is a variable array at the end of sched_domain struct,
doing a *sd = { ... } shouldn't modify it since the size isn't known at
compile time and the compiler will only overwrite the fixed fields.

Is there a compiler angle I'm missing here?

The cpumask_and() that comes first looks like:

@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        ldr     r3, [r9]        @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ kernel/sched/topology.c:1646:         u64 now = sched_clock();
        strd    r0, [sp, #28]   @,,
@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        mov     r1, r6  @, i
        mov     r0, r9  @, ivtmp.1798
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        mov     r4, fp  @ tmp740, sd
@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        blx     r3              @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        ldr     r3, [r0]        @ MEM[(const long unsigned int *)_356], MEM[(const long unsigned int *)_356]
        ldr     r0, [r7]        @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
        and     r0, r0, r3      @ tmp736, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_356]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        uxth    r0, r0  @ _360, tmp736
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        str     r0, [r4, #292]! @ _360, MEM[(long unsigned int *)sd_352 + 292B]
---

*sd assignment looks as follows in my disassembly:

.L1867:
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        ldr     ip, [sp, #48]   @ tmp1203, %sfp
        mov     r2, #296        @,
        mov     r0, fp  @, sd
        mov     r1, #0  @,
        ldr     r3, [ip]        @ jiffies.324_453, jiffies
        str     r3, [sp, #36]   @ jiffies.324_453, %sfp
        ldr     ip, [ip]        @ jiffies.326_454, jiffies
@ kernel/sched/topology.c:1693:                 .name                   = tl->name,
        ldr     r3, [r9, #28]   @ _455, MEM[(char * *)tl_317 + 28B]
        str     r3, [sp, #16]   @ _455, %sfp
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     ip, [sp, #8]    @ jiffies.326_454, %sfp
        bl      memset          @
        ldr     r3, [sp, #36]   @ jiffies.324_453, %sfp
        ldr     r2, [sp, #28]   @ now, %sfp
        str     r3, [fp, #48]   @ jiffies.324_453, sd_352->last_balance
        ldr     r3, [sp, #16]   @ _455, %sfp
        ldr     ip, [sp, #8]    @ jiffies.326_454, %sfp
        str     r2, [fp, #72]   @ now, sd_352->newidle_stamp
        str     r3, [fp, #272]  @ _455, sd_352->name
        mov     r3, #16 @ tmp1502,
        ldr     r2, [sp, #32]   @ now, %sfp
        str     r3, [fp, #20]   @ tmp1502, sd_352->busy_factor
@ kernel/sched/topology.c:1678:                                         | sd_flags
        orr     r3, r4, #4096   @ _452, sd_flags,
@ kernel/sched/topology.c:1696:         WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
        and     r4, r4, #160    @ tmp779, sd_flags,
@ kernel/sched/topology.c:1678:                                         | sd_flags
        orr     r3, r3, #23     @ _452, _452,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     r2, [fp, #76]   @ now, sd_352->newidle_stamp
@ kernel/sched/topology.c:1696:         WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
        cmp     r4, #160        @ tmp779,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        mov     r2, #512        @ tmp776,
        str     ip, [fp, #88]   @ jiffies.326_454, sd_352->last_decay_max_lb_cost
        str     r2, [fp, #60]   @ tmp776, sd_352->newidle_call
        str     r2, [fp, #68]   @ tmp776, sd_352->newidle_ratio
@ kernel/sched/topology.c:1662:                 .max_interval           = 2*sd_weight,
        lsl     r2, r10, #1     @ tmp773, _484,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     r5, [fp, #4]    @ sd, sd_352->child
        str     r2, [fp, #16]   @ tmp773, sd_352->max_interval
        mov     r2, #117        @ tmp775,
        str     r10, [fp, #12]  @ _484, sd_352->min_interval
        str     r2, [fp, #24]   @ tmp775, sd_352->imbalance_pct
        mov     r2, #256        @ tmp777,
        str     r10, [fp, #52]  @ _484, sd_352->balance_interval
        str     r3, [fp, #40]   @ _452, sd_352->flags
        str     r2, [fp, #64]   @ tmp777, sd_352->newidle_success
---


If I add the new cpumask_and() I get the following after *sd assignment:

@ kernel/sched/topology.c:1696:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        ldr     r3, [r9]        @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
        blx     r3              @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        ldr     r3, [r7]        @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
        ldr     r2, [r0]        @ MEM[(const long unsigned int *)_457], MEM[(const long unsigned int *)_457]
        and     r3, r3, r2      @ tmp788, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_457]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        uxth    r3, r3  @ tmp791, tmp788
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        str     r3, [fp, #292]  @ tmp791, MEM[(long unsigned int *)sd_352 + 292B]
---


Both cpumask_and() seems to store to:

  MEM[(long unsigned int *)sd_352 + 292B]

So I'm at a loss why this happens. Let me dig little more.

-- 
Thanks and Regards,
Prateek


  reply	other threads:[~2026-03-21  8:59 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-12  4:44 [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 1/9] sched/topology: Compute sd_weight considering cpuset partitions K Prateek Nayak
2026-03-12  9:34   ` Peter Zijlstra
2026-03-12  9:59     ` K Prateek Nayak
2026-03-12 10:01       ` Peter Zijlstra
2026-03-12 10:09         ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-20 23:58     ` Nathan Chancellor
2026-03-21  3:36       ` K Prateek Nayak
2026-03-21  7:33         ` Chen, Yu C
2026-03-21  7:47           ` Chen, Yu C
2026-03-21  8:59             ` K Prateek Nayak [this message]
2026-03-21  9:45               ` K Prateek Nayak
2026-03-21 10:13                 ` K Prateek Nayak
2026-03-21 12:48                   ` Chen, Yu C
2026-03-24  2:54                     ` K Prateek Nayak
2026-03-21 14:13                   ` Shrikanth Hegde
2026-03-21 15:14                     ` K Prateek Nayak
2026-03-21 16:38       ` [PATCH] sched/topology: Initialize sd_span after assignment to *sd K Prateek Nayak
2026-03-23  9:08         ` Shrikanth Hegde
2026-03-23 17:34           ` K Prateek Nayak
2026-03-23  9:36         ` Peter Zijlstra
2026-03-23 13:24           ` Jon Hunter
2026-03-23 15:36           ` Chen, Yu C
2026-03-23 17:24           ` K Prateek Nayak
2026-03-23 22:41           ` Nathan Chancellor
2026-03-24  9:10           ` [tip: sched/core] sched/topology: Fix sched_domain_span() tip-bot2 for Peter Zijlstra
2026-03-12  4:44 ` [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper K Prateek Nayak
2026-03-12 13:37   ` kernel test robot
2026-03-12 15:42     ` K Prateek Nayak
2026-03-12 16:02       ` Peter Zijlstra
2026-03-16  0:18   ` Dietmar Eggemann
2026-03-16  3:41     ` K Prateek Nayak
2026-03-16  8:24       ` Dietmar Eggemann
2026-03-16  8:50         ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 3/9] sched/topology: Allocate per-CPU sched_domain_shared in s_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 4/9] sched/topology: Switch to assigning "sd->shared" from s_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 5/9] sched/topology: Remove sched_domain_shared allocation with sd_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 6/9] sched/core: Check for rcu_read_lock_any_held() in idle_get_state() K Prateek Nayak
2026-03-12  9:46   ` Peter Zijlstra
2026-03-12 10:06     ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path K Prateek Nayak
2026-03-15 23:36   ` Dietmar Eggemann
2026-03-16  3:19     ` K Prateek Nayak
2026-03-18  8:08     ` [tip: sched/core] PM: EM: Switch to rcu_dereference_all() in " tip-bot2 for Dietmar Eggemann
2026-03-18  8:08   ` [tip: sched/core] sched/fair: Remove superfluous rcu_read_lock() in the " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 8/9] sched/fair: Simplify the entry condition for update_idle_cpu_scan() K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 9/9] sched/fair: Simplify SIS_UTIL handling in select_idle_cpu() K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-16  0:22 ` [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation Dietmar Eggemann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com \
    --to=kprateek.nayak@amd.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=nathan@kernel.org \
    --cc=peterz@infradead.org \
    --cc=sshegde@linux.ibm.com \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox