All of lore.kernel.org
 help / color / mirror / Atom feed
From: K Prateek Nayak <kprateek.nayak@amd.com>
To: "Chen, Yu C" <yu.c.chen@intel.com>
Cc: <linux-tip-commits@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>, <x86@kernel.org>,
	Nathan Chancellor <nathan@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [tip: sched/core] sched/topology: Compute sd_weight considering cpuset partitions
Date: Sat, 21 Mar 2026 14:29:05 +0530	[thread overview]
Message-ID: <7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com> (raw)
In-Reply-To: <470ce693-ee2e-414e-930b-d6581d649110@intel.com>

Hello Chenyu,

On 3/21/2026 1:17 PM, Chen, Yu C wrote:
> On 3/21/2026 3:33 PM, Chen, Yu C wrote:
>> On 3/21/2026 11:36 AM, K Prateek Nayak wrote:
>>>    sd->span_weight = cpumask_weight(sched_domain_span(sd));
>>>
>>> which should have crashed too if we had a NULL pointer in the
>>> cpumask range. So I'm at a loss. Maybe the pc points to a
>>> different location in your build?
>>>
>>
>> A wild guess, the major change is that we access sd->span, before
>> initializing  the sd structure with *sd = { ... }. The sd is allocated
>> via alloc_percpu() uninitialized, the span at the end of the sd structure
>> remain uninitialized. It is unclear how cpumask_weight(sd->span) might be
>> affected by this uninitialized state. Before this patch, after *sd = { ... }
>> is executed, the contents of  sd->span are explicitly set to 0, which might
>> be safer?
>>
> 
> I replied too fast, please ignore above comments, the sd->span should have been
> set via cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu))

So I managed to reproduce the crash and it is actually crashing at:

  last->next = first;

in build_sched_groups(). If I print the span befora nd after we do
the *sd = { ... }, I see:

  [    0.056301] span before: 0
  [    0.056559] span after:
  [    0.056686] span double check:

double check does a cpumask_pr_args(sched_domain_span(sd)).
This solves the crash on top of this patch:

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 79bab80af8f2..b347ae5d2786 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1693,6 +1693,8 @@ sd_init(struct sched_domain_topology_level *tl,
 		.name			= tl->name,
 	};
 
+	cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
+
 	WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
 		  (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
 		  "CPU capacity asymmetry not supported on SMT\n");
---

And I see:

  [    0.056479] span before: 0
  [    0.056749] span after: 0
  [    0.056881] span double check: 0


But since span[] is a variable array at the end of sched_domain struct,
doing a *sd = { ... } shouldn't modify it since the size isn't known at
compile time and the compiler will only overwrite the fixed fields.

Is there a compiler angle I'm missing here?

The cpumask_and() that comes first looks like:

@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        ldr     r3, [r9]        @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ kernel/sched/topology.c:1646:         u64 now = sched_clock();
        strd    r0, [sp, #28]   @,,
@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        mov     r1, r6  @, i
        mov     r0, r9  @, ivtmp.1798
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        mov     r4, fp  @ tmp740, sd
@ kernel/sched/topology.c:1649:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        blx     r3              @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        ldr     r3, [r0]        @ MEM[(const long unsigned int *)_356], MEM[(const long unsigned int *)_356]
        ldr     r0, [r7]        @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
        and     r0, r0, r3      @ tmp736, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_356]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        uxth    r0, r0  @ _360, tmp736
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        str     r0, [r4, #292]! @ _360, MEM[(long unsigned int *)sd_352 + 292B]
---

*sd assignment looks as follows in my disassembly:

.L1867:
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        ldr     ip, [sp, #48]   @ tmp1203, %sfp
        mov     r2, #296        @,
        mov     r0, fp  @, sd
        mov     r1, #0  @,
        ldr     r3, [ip]        @ jiffies.324_453, jiffies
        str     r3, [sp, #36]   @ jiffies.324_453, %sfp
        ldr     ip, [ip]        @ jiffies.326_454, jiffies
@ kernel/sched/topology.c:1693:                 .name                   = tl->name,
        ldr     r3, [r9, #28]   @ _455, MEM[(char * *)tl_317 + 28B]
        str     r3, [sp, #16]   @ _455, %sfp
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     ip, [sp, #8]    @ jiffies.326_454, %sfp
        bl      memset          @
        ldr     r3, [sp, #36]   @ jiffies.324_453, %sfp
        ldr     r2, [sp, #28]   @ now, %sfp
        str     r3, [fp, #48]   @ jiffies.324_453, sd_352->last_balance
        ldr     r3, [sp, #16]   @ _455, %sfp
        ldr     ip, [sp, #8]    @ jiffies.326_454, %sfp
        str     r2, [fp, #72]   @ now, sd_352->newidle_stamp
        str     r3, [fp, #272]  @ _455, sd_352->name
        mov     r3, #16 @ tmp1502,
        ldr     r2, [sp, #32]   @ now, %sfp
        str     r3, [fp, #20]   @ tmp1502, sd_352->busy_factor
@ kernel/sched/topology.c:1678:                                         | sd_flags
        orr     r3, r4, #4096   @ _452, sd_flags,
@ kernel/sched/topology.c:1696:         WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
        and     r4, r4, #160    @ tmp779, sd_flags,
@ kernel/sched/topology.c:1678:                                         | sd_flags
        orr     r3, r3, #23     @ _452, _452,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     r2, [fp, #76]   @ now, sd_352->newidle_stamp
@ kernel/sched/topology.c:1696:         WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
        cmp     r4, #160        @ tmp779,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        mov     r2, #512        @ tmp776,
        str     ip, [fp, #88]   @ jiffies.326_454, sd_352->last_decay_max_lb_cost
        str     r2, [fp, #60]   @ tmp776, sd_352->newidle_call
        str     r2, [fp, #68]   @ tmp776, sd_352->newidle_ratio
@ kernel/sched/topology.c:1662:                 .max_interval           = 2*sd_weight,
        lsl     r2, r10, #1     @ tmp773, _484,
@ kernel/sched/topology.c:1660:         *sd = (struct sched_domain){
        str     r5, [fp, #4]    @ sd, sd_352->child
        str     r2, [fp, #16]   @ tmp773, sd_352->max_interval
        mov     r2, #117        @ tmp775,
        str     r10, [fp, #12]  @ _484, sd_352->min_interval
        str     r2, [fp, #24]   @ tmp775, sd_352->imbalance_pct
        mov     r2, #256        @ tmp777,
        str     r10, [fp, #52]  @ _484, sd_352->balance_interval
        str     r3, [fp, #40]   @ _452, sd_352->flags
        str     r2, [fp, #64]   @ tmp777, sd_352->newidle_success
---


If I add the new cpumask_and() I get the following after *sd assignment:

@ kernel/sched/topology.c:1696:         cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
        ldr     r3, [r9]        @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317], MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
        blx     r3              @ MEM[(const struct cpumask * (*<T2127>) (struct sched_domain_topology_level *, int) *)tl_317]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        ldr     r3, [r7]        @ MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)cpu_map_104(D)]
        ldr     r2, [r0]        @ MEM[(const long unsigned int *)_457], MEM[(const long unsigned int *)_457]
        and     r3, r3, r2      @ tmp788, MEM[(const long unsigned int *)cpu_map_104(D)], MEM[(const long unsigned int *)_457]
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        uxth    r3, r3  @ tmp791, tmp788
@ ./include/linux/bitmap.h:329:                 return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        str     r3, [fp, #292]  @ tmp791, MEM[(long unsigned int *)sd_352 + 292B]
---


Both cpumask_and() seems to store to:

  MEM[(long unsigned int *)sd_352 + 292B]

So I'm at a loss why this happens. Let me dig little more.

-- 
Thanks and Regards,
Prateek


  reply	other threads:[~2026-03-21  8:59 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-12  4:44 [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 1/9] sched/topology: Compute sd_weight considering cpuset partitions K Prateek Nayak
2026-03-12  9:34   ` Peter Zijlstra
2026-03-12  9:59     ` K Prateek Nayak
2026-03-12 10:01       ` Peter Zijlstra
2026-03-12 10:09         ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-20 23:58     ` Nathan Chancellor
2026-03-21  3:36       ` K Prateek Nayak
2026-03-21  7:33         ` Chen, Yu C
2026-03-21  7:47           ` Chen, Yu C
2026-03-21  8:59             ` K Prateek Nayak [this message]
2026-03-21  9:45               ` K Prateek Nayak
2026-03-21 10:13                 ` K Prateek Nayak
2026-03-21 12:48                   ` Chen, Yu C
2026-03-24  2:54                     ` K Prateek Nayak
2026-03-21 14:13                   ` Shrikanth Hegde
2026-03-21 15:14                     ` K Prateek Nayak
2026-03-21 16:38       ` [PATCH] sched/topology: Initialize sd_span after assignment to *sd K Prateek Nayak
2026-03-23  9:08         ` Shrikanth Hegde
2026-03-23 17:34           ` K Prateek Nayak
2026-03-23  9:36         ` Peter Zijlstra
2026-03-23 13:24           ` Jon Hunter
2026-03-23 15:36           ` Chen, Yu C
2026-03-23 17:24           ` K Prateek Nayak
2026-03-23 22:41           ` Nathan Chancellor
2026-03-24  9:10           ` [tip: sched/core] sched/topology: Fix sched_domain_span() tip-bot2 for Peter Zijlstra
2026-03-12  4:44 ` [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper K Prateek Nayak
2026-03-12 13:37   ` kernel test robot
2026-03-12 15:42     ` K Prateek Nayak
2026-03-12 16:02       ` Peter Zijlstra
2026-03-16  0:18   ` Dietmar Eggemann
2026-03-16  3:41     ` K Prateek Nayak
2026-03-16  8:24       ` Dietmar Eggemann
2026-03-16  8:50         ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 3/9] sched/topology: Allocate per-CPU sched_domain_shared in s_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 4/9] sched/topology: Switch to assigning "sd->shared" from s_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 5/9] sched/topology: Remove sched_domain_shared allocation with sd_data K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 6/9] sched/core: Check for rcu_read_lock_any_held() in idle_get_state() K Prateek Nayak
2026-03-12  9:46   ` Peter Zijlstra
2026-03-12 10:06     ` K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path K Prateek Nayak
2026-03-15 23:36   ` Dietmar Eggemann
2026-03-16  3:19     ` K Prateek Nayak
2026-03-18  8:08     ` [tip: sched/core] PM: EM: Switch to rcu_dereference_all() in " tip-bot2 for Dietmar Eggemann
2026-03-18  8:08   ` [tip: sched/core] sched/fair: Remove superfluous rcu_read_lock() in the " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 8/9] sched/fair: Simplify the entry condition for update_idle_cpu_scan() K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-12  4:44 ` [PATCH v4 9/9] sched/fair: Simplify SIS_UTIL handling in select_idle_cpu() K Prateek Nayak
2026-03-18  8:08   ` [tip: sched/core] " tip-bot2 for K Prateek Nayak
2026-03-16  0:22 ` [PATCH v4 0/9] sched/topology: Optimize sd->shared allocation Dietmar Eggemann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7fad91ea-e6cd-43c8-abe3-16d7843247ed@amd.com \
    --to=kprateek.nayak@amd.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=nathan@kernel.org \
    --cc=peterz@infradead.org \
    --cc=sshegde@linux.ibm.com \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.