From: Sachin Sant <sachinp@in.ibm.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: ego@in.ibm.com, LKML <linux-kernel@vger.kernel.org>,
Stephen Rothwell <sfr@canb.auug.org.au>,
linux-next@vger.kernel.org, Ingo Molnar <mingo@elte.hu>,
Mike Galbraith <efault@gmx.de>
Subject: Re: -next: Nov 12 - kernel BUG at kernel/sched.c:7359!
Date: Mon, 23 Nov 2009 15:23:43 +0530 [thread overview]
Message-ID: <4B0A5BA7.8020604@in.ibm.com> (raw)
In-Reply-To: <1258108281.22655.5.camel@laptop>
Peter Zijlstra wrote:
> Well, it boots for me, but then, I've not been able to reproduce any
> issues anyway :/
>
> /me goes try a PREEMPT=n kernel, since that is what Mike reports boot
> funnies with..
>
> Full running diff against -tip:
>
Peter i still can recreate this issue with today's next(20091123).
Looks like the following patch haven't been merged yet.
Thanks
-Sachin
> ---
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 1f2e99d..7089063 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -2374,17 +2374,24 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
> if (task_contributes_to_load(p))
> rq->nr_uninterruptible--;
> p->state = TASK_WAKING;
> - task_rq_unlock(rq, &flags);
> + __task_rq_unlock(rq);
>
> +again:
> cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
> + if (!cpu_online(cpu))
> + cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
> + if (cpu >= nr_cpu_ids) {
> + printk(KERN_ERR "Breaking affinity on %d/%s\n", p->pid, p->comm);
> + cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
> + goto again;
> + }
> +
> if (cpu != orig_cpu) {
> - local_irq_save(flags);
> rq = cpu_rq(cpu);
> update_rq_clock(rq);
> set_task_cpu(p, cpu);
> - local_irq_restore(flags);
> }
> - rq = task_rq_lock(p, &flags);
> + rq = __task_rq_lock(p);
>
> WARN_ON(p->state != TASK_WAKING);
> cpu = task_cpu(p);
> @@ -7620,6 +7627,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
> unsigned long flags;
> struct rq *rq;
>
> + printk(KERN_ERR "migration call\n");
> +
> switch (action) {
>
> case CPU_UP_PREPARE:
> @@ -9186,6 +9195,8 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
> static int update_sched_domains(struct notifier_block *nfb,
> unsigned long action, void *hcpu)
> {
> + printk(KERN_ERR "update_sched_domains\n");
> +
> switch (action) {
> case CPU_ONLINE:
> case CPU_ONLINE_FROZEN:
> diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
> index 5488a5d..0ff21af 100644
> --- a/kernel/sched_fair.c
> +++ b/kernel/sched_fair.c
> @@ -1345,6 +1345,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
> }
>
> /*
> + * Try and locate an idle CPU in the sched_domain.
> + */
> +static int
> +select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
> +{
> + int cpu = smp_processor_id();
> + int prev_cpu = task_cpu(p);
> + int i;
> +
> + /*
> + * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
> + * test in select_task_rq_fair) and the prev_cpu is idle then that's
> + * always a better target than the current cpu.
> + */
> + if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
> + return prev_cpu;
> +
> + /*
> + * Otherwise, iterate the domain and find an elegible idle cpu.
> + */
> + for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
> + if (!cpu_rq(i)->cfs.nr_running) {
> + target = i;
> + break;
> + }
> + }
> +
> + return target;
> +}
> +
> +/*
> * sched_balance_self: balance the current task (running on cpu) in domains
> * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
> * SD_BALANCE_EXEC.
> @@ -1398,37 +1429,34 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
> want_sd = 0;
> }
>
> - if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
> - int candidate = -1, i;
> + /*
> + * While iterating the domains looking for a spanning
> + * WAKE_AFFINE domain, adjust the affine target to any idle cpu
> + * in cache sharing domains along the way.
> + */
> + if (want_affine) {
> + int target = -1;
>
> + /*
> + * If both cpu and prev_cpu are part of this domain,
> + * cpu is a valid SD_WAKE_AFFINE target.
> + */
> if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
> - candidate = cpu;
> + target = cpu;
>
> /*
> - * Check for an idle shared cache.
> + * If there's an idle sibling in this domain, make that
> + * the wake_affine target instead of the current cpu.
> */
> - if (tmp->flags & SD_PREFER_SIBLING) {
> - if (candidate == cpu) {
> - if (!cpu_rq(prev_cpu)->cfs.nr_running)
> - candidate = prev_cpu;
> - }
> + if (tmp->flags & SD_PREFER_SIBLING)
> + target = select_idle_sibling(p, tmp, target);
>
> - if (candidate == -1 || candidate == cpu) {
> - for_each_cpu(i, sched_domain_span(tmp)) {
> - if (!cpumask_test_cpu(i, &p->cpus_allowed))
> - continue;
> - if (!cpu_rq(i)->cfs.nr_running) {
> - candidate = i;
> - break;
> - }
> - }
> + if (target >= 0) {
> + if (tmp->flags & SD_WAKE_AFFINE) {
> + affine_sd = tmp;
> + want_affine = 0;
> }
> - }
> -
> - if (candidate >= 0) {
> - affine_sd = tmp;
> - want_affine = 0;
> - cpu = candidate;
> + cpu = target;
> }
> }
>
>
>
>
--
---------------------------------
Sachin Sant
IBM Linux Technology Center
India Systems and Technology Labs
Bangalore, India
---------------------------------
next prev parent reply other threads:[~2009-11-23 9:53 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-12 8:51 linux-next: Tree for November 12 Stephen Rothwell
2009-11-12 11:53 ` -next: Nov 12 - kernel BUG at kernel/sched.c:7359! Sachin Sant
2009-11-12 12:10 ` Peter Zijlstra
2009-11-12 12:23 ` Sachin Sant
2009-11-12 12:27 ` Peter Zijlstra
2009-11-12 17:10 ` Peter Zijlstra
2009-11-13 9:00 ` Sachin Sant
2009-11-13 9:06 ` Peter Zijlstra
2009-11-13 9:58 ` Gautham R Shenoy
2009-11-13 10:16 ` Peter Zijlstra
2009-11-13 10:31 ` Peter Zijlstra
2009-11-13 10:49 ` Peter Zijlstra
2009-11-13 11:44 ` Sachin Sant
2009-11-13 16:12 ` Mike Galbraith
2009-11-23 9:53 ` Sachin Sant [this message]
2009-11-25 13:42 ` Peter Zijlstra
2009-11-26 4:39 ` Sachin Sant
2009-12-04 12:06 ` Sachin Sant
2009-12-04 12:16 ` Peter Zijlstra
2009-12-07 6:16 ` Sachin Sant
2009-12-12 7:09 ` Max Krasnyansky
2009-11-12 17:40 ` linux-next: Tree for November 12 (acpi/processor.h) Randy Dunlap
2009-11-12 18:09 ` linux-next: Tree for November 12 (acpi_processor_get_bios_limit) Randy Dunlap
2009-11-12 23:46 ` [PATCH -next] staging/line6: fix printk formats Randy Dunlap
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B0A5BA7.8020604@in.ibm.com \
--to=sachinp@in.ibm.com \
--cc=efault@gmx.de \
--cc=ego@in.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=sfr@canb.auug.org.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.