All of lore.kernel.org
 help / color / mirror / Atom feed
* [BUG] in sched fair on v3.13-rc1+ doing hotplug
@ 2013-11-27 13:13 Steven Rostedt
  2013-11-27 13:22 ` Peter Zijlstra
  0 siblings, 1 reply; 3+ messages in thread
From: Steven Rostedt @ 2013-11-27 13:13 UTC (permalink / raw)
  To: LKML
  Cc: Ingo Molnar, Peter Zijlstra, Preeti U Murthy, svaidy,
	vincent.guittot, bitbucket, benh, anton, Morten.Rasmussen, pjt,
	mikey, Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 3636 bytes --]

Running my tests on 3.13-rc1 I hit the following crash with my cpu
hotplug stress test (attached):

[   82.455572] BUG: unable to handle kernel paging request at 0000016d00000010
[   82.456109] IP: [<ffffffff81069183>] set_cpu_sd_state_idle+0x42/0x54
[   82.456109] PGD 0 
[   82.456109] Oops: 0000 [#1] PREEMPT SMP 
[   82.456109] Modules linked in: ipt_MASQUERADE sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec
 snd_hwdep kvm_intel kvm snd_seq snd_seq_device snd_pcm snd_page_alloc shpchp snd_timer snd microcode soundcore i2c_i801 pata_acpi firewire_ohci firewire_core crc_itu_t ata_generic
 i915 drm_kms_helper drm i2c_algo_bit i2c_core video
[   82.456109] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.13.0-rc1-test+ #33
[   82.456109] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
[   82.456109] task: ffff88007a329290 ti: ffff88007a32a000 task.ti: ffff88007a32a000
[   82.456109] RIP: 0010:[<ffffffff81069183>]  [<ffffffff81069183>] set_cpu_sd_state_idle+0x42/0x54
[   82.456109] RSP: 0018:ffff88007a32bea8  EFLAGS: 00010246
[   82.456109] RAX: 0000016d00000000 RBX: 0000000000000003 RCX: ffff88007a32a000
[   82.456109] RDX: ffff88007d58d1f8 RSI: ffffffff8107a025 RDI: ffffffff8150104e
[   82.456109] RBP: ffff88007a32beb8 R08: ffff88007a32a000 R09: 0000000000000001
[   82.456109] R10: 0000000000000001 R11: 0000000000000400 R12: ffff88007a32a000
[   82.456109] R13: ffff88007a32bfd8 R14: 0000000000000000 R15: 0000000000000000
[   82.456109] FS:  0000000000000000(0000) GS:ffff88007d580000(0000) knlGS:0000000000000000
[   82.456109] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   82.456109] CR2: 0000016d00000010 CR3: 0000000077e9a000 CR4: 00000000000007e0
[   82.456109] Stack:
[   82.456109]  ffff88007a32a000 ffff88007a32a000 ffff88007a32bed8 ffffffff8108b38e
[   82.456109]  ffffffff8150104e ffff88007a32a000 ffff88007a32bf18 ffffffff81079ed3
[   82.456109]  ffff88007a32bf08 2c5c9774106fa5f0 50a2f20e3f906ad7 000000000000cd80
[   82.456109] Call Trace:
[   82.456109]  [<ffffffff8108b38e>] tick_nohz_idle_enter+0x38/0x6d
[   82.456109]  [<ffffffff8150104e>] ? schedule_preempt_disabled+0x22/0x24
[   82.456109]  [<ffffffff81079ed3>] cpu_startup_entry+0x77/0x1d0
[   82.456109]  [<ffffffff810263c1>] start_secondary+0x254/0x25b
[   82.456109] Code: 48 63 c3 48 c7 c2 f8 d1 00 00 48 03 14 c5 30 ea ac 81 48 8b 02 48 85 c0 74 19 83 78 4c 00 75 13 c7 40 4c 01 00 00 00 48 8b 40 10 <48> 8b 40 10 f0 ff 48 1c e8 05 58 01 00 41 5b 5b 5d c3 66 66 66 
[   82.456109] RIP  [<ffffffff81069183>] set_cpu_sd_state_idle+0x42/0x54
[   82.456109]  RSP <ffff88007a32bea8>
[   82.456109] CR2: 0000016d00000010
[   82.456109] ---[ end trace 725ad8735a6099b4 ]---
[   82.456109] Kernel panic - not syncing: Attempted to kill the idle task!

According to gdb, the bug happened here:

static inline void set_cpu_sd_state_busy(void)
{
        struct sched_domain *sd;
        int cpu = smp_processor_id();

        rcu_read_lock();
        sd = rcu_dereference(per_cpu(sd_busy, cpu));

        if (!sd || !sd->nohz_idle)
                goto unlock;
        sd->nohz_idle = 0;

        atomic_inc(&sd->groups->sgp->nr_busy_cpus);  <<---- BUG
unlock:
        rcu_read_unlock();
}



I bisected it down to this commit:

commit 37dc6b50cee97954c4e6edcd5b1fa614b76038ee
Author: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Date:   Wed Oct 30 08:42:52 2013 +0530

    sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus

I can send a config if you want.

-- Steve

[-- Attachment #2: stress-cpu-hotplug --]
[-- Type: application/octet-stream, Size: 906 bytes --]

#!/bin/bash

MAXCPUS=12

# find cpus

CPUS=`ls -d /sys/devices/system/cpu/cpu[1-9]*`
NR=`echo $CPUS | wc -w`

let x=0
for cpu in $CPUS; do
    file=$cpu/online
    CPUONLINE[$x]=$file
    ENB[$x]=`cat $file`
    CPU[$x]=`basename $cpu`
    let x=$x+1
done

let MAXCNT=$x

MSKCNT=$MAXCNT

if [ $MAXCNT -gt $MAXCPUS ]; then
    MSKCNT=$MAXCPUS
fi

let MSKCNT=2**$MSKCNT

hotplug() {
    MSK=$1
    ECHO="$MSK"
    CMD=""
    x=0
    while [ $MSK -gt 0 ]; do
	let bit=$MSK'&'1
	if [ $bit -eq 1 ]; then
	    if [ ${ENB[$x]} -eq 1 ]; then
		cmd="disabling"
		ENB[$x]=0
		num=0
	    else
		cmd="enabling"
		ENB[$x]=1
		num=1
	    fi
	    CMD="$CMD echo $num > ${CPUONLINE[$x]};"
	    ECHO="$ECHO $cmd ${CPU[$x]}"
	fi
	let x=$x+1
	let MSK=$MSK'>>'1
    done
    echo $ECHO
    eval $CMD
}

let MSKCNT=$MSKCNT-1
for i in `seq $MSKCNT`; do
    hotplug $i
done
for i in `seq $MSKCNT`; do
    hotplug $i
done

exit 0

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [BUG] in sched fair on v3.13-rc1+ doing hotplug
  2013-11-27 13:13 [BUG] in sched fair on v3.13-rc1+ doing hotplug Steven Rostedt
@ 2013-11-27 13:22 ` Peter Zijlstra
  2013-11-27 14:08   ` Steven Rostedt
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Zijlstra @ 2013-11-27 13:22 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, Ingo Molnar, Preeti U Murthy, svaidy, vincent.guittot,
	bitbucket, benh, anton, Morten.Rasmussen, pjt, mikey,
	Linus Torvalds

On Wed, Nov 27, 2013 at 08:13:45AM -0500, Steven Rostedt wrote:
>         atomic_inc(&sd->groups->sgp->nr_busy_cpus);  <<---- BUG
> unlock:
>         rcu_read_unlock();
> }

tip/sched/urgent has this one:

---
commit 42eb088ed246a5a817bb45a8b32fe234cf1c0f8b
Author: Peter Zijlstra <peterz@infradead.org>
Date:   Tue Nov 19 16:41:49 2013 +0100

    sched: Avoid NULL dereference on sd_busy
    
    Commit 37dc6b50cee9 ("sched: Remove unnecessary iteration over sched
    domains to update nr_busy_cpus") forgot to clear 'sd_busy' under some
    conditions leading to a possible NULL deref in set_cpu_sd_state_idle().
    
    Reported-by: Anton Blanchard <anton@samba.org>
    Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
    Signed-off-by: Peter Zijlstra <peterz@infradead.org>
    Link: http://lkml.kernel.org/r/20131118113701.GF3866@twins.programming.kicks-ass.net
    Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c1808606ee5f..a1591ca7eb5a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4910,8 +4910,9 @@ static void update_top_cache_domain(int cpu)
 	if (sd) {
 		id = cpumask_first(sched_domain_span(sd));
 		size = cpumask_weight(sched_domain_span(sd));
-		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
+		sd = sd->parent; /* sd_busy */
 	}
+	rcu_assign_pointer(per_cpu(sd_busy, cpu), sd);
 
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
 	per_cpu(sd_llc_size, cpu) = size;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [BUG] in sched fair on v3.13-rc1+ doing hotplug
  2013-11-27 13:22 ` Peter Zijlstra
@ 2013-11-27 14:08   ` Steven Rostedt
  0 siblings, 0 replies; 3+ messages in thread
From: Steven Rostedt @ 2013-11-27 14:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: LKML, Ingo Molnar, Preeti U Murthy, svaidy, vincent.guittot,
	bitbucket, benh, anton, Morten.Rasmussen, pjt, mikey,
	Linus Torvalds

On Wed, 27 Nov 2013 14:22:43 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Nov 27, 2013 at 08:13:45AM -0500, Steven Rostedt wrote:
> >         atomic_inc(&sd->groups->sgp->nr_busy_cpus);  <<---- BUG
> > unlock:
> >         rcu_read_unlock();
> > }
> 
> tip/sched/urgent has this one:
> 
> ---
> commit 42eb088ed246a5a817bb45a8b32fe234cf1c0f8b
> Author: Peter Zijlstra <peterz@infradead.org>
> Date:   Tue Nov 19 16:41:49 2013 +0100
> 
>     sched: Avoid NULL dereference on sd_busy

After applying this patch, the bug does go away.

Thanks!

-- Steve

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-11-27 14:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-27 13:13 [BUG] in sched fair on v3.13-rc1+ doing hotplug Steven Rostedt
2013-11-27 13:22 ` Peter Zijlstra
2013-11-27 14:08   ` Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.